From 36602c058ef824f1ceac120cd8c3f3d1daf469ad Mon Sep 17 00:00:00 2001 From: tjacovich Date: Wed, 1 Nov 2023 15:57:53 -0400 Subject: [PATCH 1/5] Add uuid7 to PipelineUtils. --- SciXPipelineUtils/scix_uuid.py | 98 ++++++++++++++++++++++++++++++++++ tests/test_scix_uuid.py | 10 ++++ 2 files changed, 108 insertions(+) create mode 100644 SciXPipelineUtils/scix_uuid.py create mode 100644 tests/test_scix_uuid.py diff --git a/SciXPipelineUtils/scix_uuid.py b/SciXPipelineUtils/scix_uuid.py new file mode 100644 index 0000000..56ea4a7 --- /dev/null +++ b/SciXPipelineUtils/scix_uuid.py @@ -0,0 +1,98 @@ +""" +Pulled from prototype code: +https://github.com/uuid6/prototypes/commit/475ad927455a2d35aaade518a1928aec93d78a5c +""" + +import random +import time +from uuid import * + +#These are needed to keep the semi-sequential nature of the UUIDs +sequenceCounter = 0 +_last_v7timestamp = 0 +_last_uuid_int = 0 +_last_sequence = None +uuidVariant = '10' + + +def uuid7(): + """Generates a 128-bit version 7 UUID with nanoseconds precision timestamp and random node + + example: 061cdd23-93a0-73df-a200-6ff3e72d92e9 + + format: unixts|subsec_a|version|subsec_b|variant|subsec_seq_node + + :param returnType: bin, int, hex + :return: bin, int, hex + """ + + global _last_v7timestamp + global _last_uuid_int + global _last_sequence + global sequenceCounter + global uuidVariant + uuidVersion = '0111' # ver 7 + sec_bits = 36 # unixts at second precision + subsec_bits = 30 # Enough to represent NS + version_bits = 4 # '0111' for ver 7 + variant_bits = 2 # '10' Static for UUID + sequence_bits = 8 # Enough for 256 UUIDs per NS + node_bits = (128 - sec_bits - subsec_bits - version_bits - variant_bits - sequence_bits) # 48 + + ### Timestamp Work + # Produces unix epoch with nanosecond precision + timestamp = time.time_ns() # Produces 64-bit NS timestamp + # Subsecond Math + subsec_decimal_digits = 9 # Last 9 digits of are subsection precision + subsec_decimal_divisor = (10 ** subsec_decimal_digits) # 1000000000 NS in 1 second + integer_part = int(timestamp / subsec_decimal_divisor) # Get seconds + sec = integer_part + # Conversion to decimal + fractional_part = round((timestamp % subsec_decimal_divisor) / subsec_decimal_divisor, subsec_decimal_digits) + subsec = round(fractional_part * (2 ** subsec_bits)) # Convert to 30 bit int, round + + ### Binary Conversions + ### Need subsec_a (12 bits), subsec_b (12-bits), and subsec_c (leftover bits starting subsec_seq_node) + unixts = f'{sec:036b}' + subsec_binary = f'{subsec:030b}' + subsec_a = subsec_binary[:12] # Upper 12 + subsec_b_c = subsec_binary[-18:] # Lower 18 + subsec_b = subsec_b_c[:12] # Upper 12 + subsec_c = subsec_binary[-6:] # Lower 6 + + ### Sequence Work + # Sequence starts at 0, increments if timestamp is the same, the sequence increments by 1 + # Resets if timestamp int is larger than _last_v7timestamp used for UUID generation + # Will be 8 bits for NS timestamp + if timestamp <= _last_v7timestamp: + sequenceCounter = int(sequenceCounter) + 1 + + if timestamp > _last_v7timestamp: + sequenceCounter = 0 + + sequenceCounterBin = f'{sequenceCounter:08b}' + + # Set these two before moving on + _last_v7timestamp = timestamp + _last_sequence = int(sequenceCounter) + + ### Random Node Work + randomInt = random.getrandbits(node_bits) + randomBinary = f'{randomInt:048b}' + + # Create subsec_seq_node + subsec_seq_node = subsec_c + sequenceCounterBin + randomBinary + + ### Formatting Work + # Bin merge and Int creation + UUIDv7_bin = unixts + subsec_a + uuidVersion + subsec_b + uuidVariant + subsec_seq_node + UUIDv7_int = int(UUIDv7_bin, 2) + + _last_uuid_int = UUIDv7_int + + # Convert Hex to Int then splice in dashes + UUIDv7_hex = f'{UUIDv7_int:032x}' # int to hex + UUIDv7_formatted = '-'.join( + [UUIDv7_hex[:8], UUIDv7_hex[8:12], UUIDv7_hex[12:16], UUIDv7_hex[16:20], UUIDv7_hex[20:32]]) + + UUID(UUIDv7_formatted) diff --git a/tests/test_scix_uuid.py b/tests/test_scix_uuid.py new file mode 100644 index 0000000..d9c58ba --- /dev/null +++ b/tests/test_scix_uuid.py @@ -0,0 +1,10 @@ +from unittest import TestCase +import scix_uuid +import uuid + +class TestSciXUUIDImplementation(TestCase): + def generate_uuid7(self): + test_uuid = scix_uuid.uuid7() + self.assertEqual(type(test_uuid), uuid.UUID) + self.assertEqual(type(test_uuid.hex), str) + self.assertEqual(len(test_uuid.bytes), 16) \ No newline at end of file From d1bf60ac49ff61a286152ada4f1877265c446e91 Mon Sep 17 00:00:00 2001 From: tjacovich Date: Wed, 1 Nov 2023 16:01:42 -0400 Subject: [PATCH 2/5] pre-commit updates. --- SciXPipelineUtils/scix_uuid.py | 55 ++++++++++++++++++---------------- tests/test_scix_uuid.py | 6 ++-- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/SciXPipelineUtils/scix_uuid.py b/SciXPipelineUtils/scix_uuid.py index 56ea4a7..86b32a3 100644 --- a/SciXPipelineUtils/scix_uuid.py +++ b/SciXPipelineUtils/scix_uuid.py @@ -7,12 +7,12 @@ import time from uuid import * -#These are needed to keep the semi-sequential nature of the UUIDs +# These are needed to keep the semi-sequential nature of the UUIDs sequenceCounter = 0 _last_v7timestamp = 0 _last_uuid_int = 0 _last_sequence = None -uuidVariant = '10' +uuidVariant = "10" def uuid7(): @@ -31,34 +31,36 @@ def uuid7(): global _last_sequence global sequenceCounter global uuidVariant - uuidVersion = '0111' # ver 7 - sec_bits = 36 # unixts at second precision - subsec_bits = 30 # Enough to represent NS - version_bits = 4 # '0111' for ver 7 - variant_bits = 2 # '10' Static for UUID - sequence_bits = 8 # Enough for 256 UUIDs per NS - node_bits = (128 - sec_bits - subsec_bits - version_bits - variant_bits - sequence_bits) # 48 + uuidVersion = "0111" # ver 7 + sec_bits = 36 # unixts at second precision + subsec_bits = 30 # Enough to represent NS + version_bits = 4 # '0111' for ver 7 + variant_bits = 2 # '10' Static for UUID + sequence_bits = 8 # Enough for 256 UUIDs per NS + node_bits = 128 - sec_bits - subsec_bits - version_bits - variant_bits - sequence_bits # 48 ### Timestamp Work # Produces unix epoch with nanosecond precision - timestamp = time.time_ns() # Produces 64-bit NS timestamp + timestamp = time.time_ns() # Produces 64-bit NS timestamp # Subsecond Math - subsec_decimal_digits = 9 # Last 9 digits of are subsection precision - subsec_decimal_divisor = (10 ** subsec_decimal_digits) # 1000000000 NS in 1 second - integer_part = int(timestamp / subsec_decimal_divisor) # Get seconds + subsec_decimal_digits = 9 # Last 9 digits of are subsection precision + subsec_decimal_divisor = 10**subsec_decimal_digits # 1000000000 NS in 1 second + integer_part = int(timestamp / subsec_decimal_divisor) # Get seconds sec = integer_part # Conversion to decimal - fractional_part = round((timestamp % subsec_decimal_divisor) / subsec_decimal_divisor, subsec_decimal_digits) - subsec = round(fractional_part * (2 ** subsec_bits)) # Convert to 30 bit int, round + fractional_part = round( + (timestamp % subsec_decimal_divisor) / subsec_decimal_divisor, subsec_decimal_digits + ) + subsec = round(fractional_part * (2**subsec_bits)) # Convert to 30 bit int, round ### Binary Conversions ### Need subsec_a (12 bits), subsec_b (12-bits), and subsec_c (leftover bits starting subsec_seq_node) - unixts = f'{sec:036b}' - subsec_binary = f'{subsec:030b}' - subsec_a = subsec_binary[:12] # Upper 12 - subsec_b_c = subsec_binary[-18:] # Lower 18 - subsec_b = subsec_b_c[:12] # Upper 12 - subsec_c = subsec_binary[-6:] # Lower 6 + unixts = f"{sec:036b}" + subsec_binary = f"{subsec:030b}" + subsec_a = subsec_binary[:12] # Upper 12 + subsec_b_c = subsec_binary[-18:] # Lower 18 + subsec_b = subsec_b_c[:12] # Upper 12 + subsec_c = subsec_binary[-6:] # Lower 6 ### Sequence Work # Sequence starts at 0, increments if timestamp is the same, the sequence increments by 1 @@ -70,7 +72,7 @@ def uuid7(): if timestamp > _last_v7timestamp: sequenceCounter = 0 - sequenceCounterBin = f'{sequenceCounter:08b}' + sequenceCounterBin = f"{sequenceCounter:08b}" # Set these two before moving on _last_v7timestamp = timestamp @@ -78,7 +80,7 @@ def uuid7(): ### Random Node Work randomInt = random.getrandbits(node_bits) - randomBinary = f'{randomInt:048b}' + randomBinary = f"{randomInt:048b}" # Create subsec_seq_node subsec_seq_node = subsec_c + sequenceCounterBin + randomBinary @@ -91,8 +93,9 @@ def uuid7(): _last_uuid_int = UUIDv7_int # Convert Hex to Int then splice in dashes - UUIDv7_hex = f'{UUIDv7_int:032x}' # int to hex - UUIDv7_formatted = '-'.join( - [UUIDv7_hex[:8], UUIDv7_hex[8:12], UUIDv7_hex[12:16], UUIDv7_hex[16:20], UUIDv7_hex[20:32]]) + UUIDv7_hex = f"{UUIDv7_int:032x}" # int to hex + UUIDv7_formatted = "-".join( + [UUIDv7_hex[:8], UUIDv7_hex[8:12], UUIDv7_hex[12:16], UUIDv7_hex[16:20], UUIDv7_hex[20:32]] + ) UUID(UUIDv7_formatted) diff --git a/tests/test_scix_uuid.py b/tests/test_scix_uuid.py index d9c58ba..8725bfb 100644 --- a/tests/test_scix_uuid.py +++ b/tests/test_scix_uuid.py @@ -1,10 +1,12 @@ +import uuid from unittest import TestCase + import scix_uuid -import uuid + class TestSciXUUIDImplementation(TestCase): def generate_uuid7(self): test_uuid = scix_uuid.uuid7() self.assertEqual(type(test_uuid), uuid.UUID) self.assertEqual(type(test_uuid.hex), str) - self.assertEqual(len(test_uuid.bytes), 16) \ No newline at end of file + self.assertEqual(len(test_uuid.bytes), 16) From 26604f973d31b849fb570d84ecd6e4316d56b8d8 Mon Sep 17 00:00:00 2001 From: tjacovich Date: Wed, 1 Nov 2023 16:42:58 -0400 Subject: [PATCH 3/5] Added flake8 compatible way to overload uuid module. --- SciXPipelineUtils/scix_uuid.py | 177 +++++++++++++++++---------------- 1 file changed, 89 insertions(+), 88 deletions(-) diff --git a/SciXPipelineUtils/scix_uuid.py b/SciXPipelineUtils/scix_uuid.py index 86b32a3..d137566 100644 --- a/SciXPipelineUtils/scix_uuid.py +++ b/SciXPipelineUtils/scix_uuid.py @@ -5,97 +5,98 @@ import random import time -from uuid import * - -# These are needed to keep the semi-sequential nature of the UUIDs -sequenceCounter = 0 -_last_v7timestamp = 0 -_last_uuid_int = 0 -_last_sequence = None -uuidVariant = "10" - - -def uuid7(): - """Generates a 128-bit version 7 UUID with nanoseconds precision timestamp and random node - - example: 061cdd23-93a0-73df-a200-6ff3e72d92e9 - - format: unixts|subsec_a|version|subsec_b|variant|subsec_seq_node - - :param returnType: bin, int, hex - :return: bin, int, hex - """ - - global _last_v7timestamp - global _last_uuid_int - global _last_sequence - global sequenceCounter - global uuidVariant - uuidVersion = "0111" # ver 7 - sec_bits = 36 # unixts at second precision - subsec_bits = 30 # Enough to represent NS - version_bits = 4 # '0111' for ver 7 - variant_bits = 2 # '10' Static for UUID - sequence_bits = 8 # Enough for 256 UUIDs per NS - node_bits = 128 - sec_bits - subsec_bits - version_bits - variant_bits - sequence_bits # 48 - - ### Timestamp Work - # Produces unix epoch with nanosecond precision - timestamp = time.time_ns() # Produces 64-bit NS timestamp - # Subsecond Math - subsec_decimal_digits = 9 # Last 9 digits of are subsection precision - subsec_decimal_divisor = 10**subsec_decimal_digits # 1000000000 NS in 1 second - integer_part = int(timestamp / subsec_decimal_divisor) # Get seconds - sec = integer_part - # Conversion to decimal - fractional_part = round( - (timestamp % subsec_decimal_divisor) / subsec_decimal_divisor, subsec_decimal_digits - ) - subsec = round(fractional_part * (2**subsec_bits)) # Convert to 30 bit int, round - - ### Binary Conversions - ### Need subsec_a (12 bits), subsec_b (12-bits), and subsec_c (leftover bits starting subsec_seq_node) - unixts = f"{sec:036b}" - subsec_binary = f"{subsec:030b}" - subsec_a = subsec_binary[:12] # Upper 12 - subsec_b_c = subsec_binary[-18:] # Lower 18 - subsec_b = subsec_b_c[:12] # Upper 12 - subsec_c = subsec_binary[-6:] # Lower 6 - - ### Sequence Work - # Sequence starts at 0, increments if timestamp is the same, the sequence increments by 1 - # Resets if timestamp int is larger than _last_v7timestamp used for UUID generation - # Will be 8 bits for NS timestamp - if timestamp <= _last_v7timestamp: - sequenceCounter = int(sequenceCounter) + 1 - - if timestamp > _last_v7timestamp: - sequenceCounter = 0 - - sequenceCounterBin = f"{sequenceCounter:08b}" +import uuid - # Set these two before moving on - _last_v7timestamp = timestamp - _last_sequence = int(sequenceCounter) - ### Random Node Work - randomInt = random.getrandbits(node_bits) - randomBinary = f"{randomInt:048b}" +class scix_uuid: + def uuid7(): + """Generates a 128-bit version 7 UUID with nanoseconds precision timestamp and random node - # Create subsec_seq_node - subsec_seq_node = subsec_c + sequenceCounterBin + randomBinary + example: 061cdd23-93a0-73df-a200-6ff3e72d92e9 - ### Formatting Work - # Bin merge and Int creation - UUIDv7_bin = unixts + subsec_a + uuidVersion + subsec_b + uuidVariant + subsec_seq_node - UUIDv7_int = int(UUIDv7_bin, 2) + format: unixts|subsec_a|version|subsec_b|variant|subsec_seq_node - _last_uuid_int = UUIDv7_int + :return: uuid.UUID + """ - # Convert Hex to Int then splice in dashes - UUIDv7_hex = f"{UUIDv7_int:032x}" # int to hex - UUIDv7_formatted = "-".join( - [UUIDv7_hex[:8], UUIDv7_hex[8:12], UUIDv7_hex[12:16], UUIDv7_hex[16:20], UUIDv7_hex[20:32]] - ) - - UUID(UUIDv7_formatted) + sequenceCounter = 0 + _last_v7timestamp = 0 + uuidVariant = "10" + + uuidVersion = "0111" # ver 7 + sec_bits = 36 # unixts at second precision + subsec_bits = 30 # Enough to represent NS + version_bits = 4 # '0111' for ver 7 + variant_bits = 2 # '10' Static for UUID + sequence_bits = 8 # Enough for 256 UUIDs per NS + node_bits = ( + 128 - sec_bits - subsec_bits - version_bits - variant_bits - sequence_bits + ) # 48 + + ### Timestamp Work + # Produces unix epoch with nanosecond precision + timestamp = time.time_ns() # Produces 64-bit NS timestamp + # Subsecond Math + subsec_decimal_digits = 9 # Last 9 digits of are subsection precision + subsec_decimal_divisor = 10**subsec_decimal_digits # 1000000000 NS in 1 second + integer_part = int(timestamp / subsec_decimal_divisor) # Get seconds + sec = integer_part + # Conversion to decimal + fractional_part = round( + (timestamp % subsec_decimal_divisor) / subsec_decimal_divisor, subsec_decimal_digits + ) + subsec = round(fractional_part * (2**subsec_bits)) # Convert to 30 bit int, round + + ### Binary Conversions + ### Need subsec_a (12 bits), subsec_b (12-bits), and subsec_c (leftover bits starting subsec_seq_node) + unixts = f"{sec:036b}" + subsec_binary = f"{subsec:030b}" + subsec_a = subsec_binary[:12] # Upper 12 + subsec_b_c = subsec_binary[-18:] # Lower 18 + subsec_b = subsec_b_c[:12] # Upper 12 + subsec_c = subsec_binary[-6:] # Lower 6 + + ### Sequence Work + # Sequence starts at 0, increments if timestamp is the same, the sequence increments by 1 + # Resets if timestamp int is larger than _last_v7timestamp used for UUID generation + # Will be 8 bits for NS timestamp + if timestamp <= _last_v7timestamp: + sequenceCounter = int(sequenceCounter) + 1 + + if timestamp > _last_v7timestamp: + sequenceCounter = 0 + + sequenceCounterBin = f"{sequenceCounter:08b}" + + # Set these two before moving on + _last_v7timestamp = timestamp + + ### Random Node Work + randomInt = random.getrandbits(node_bits) + randomBinary = f"{randomInt:048b}" + + # Create subsec_seq_node + subsec_seq_node = subsec_c + sequenceCounterBin + randomBinary + + ### Formatting Work + # Bin merge and Int creation + UUIDv7_bin = unixts + subsec_a + uuidVersion + subsec_b + uuidVariant + subsec_seq_node + UUIDv7_int = int(UUIDv7_bin, 2) + + # Convert Hex to Int then splice in dashes + UUIDv7_hex = f"{UUIDv7_int:032x}" # int to hex + UUIDv7_formatted = "-".join( + [ + UUIDv7_hex[:8], + UUIDv7_hex[8:12], + UUIDv7_hex[12:16], + UUIDv7_hex[16:20], + UUIDv7_hex[20:32], + ] + ) + + return uuid.UUID(UUIDv7_formatted) + + +for i in dir(uuid): + setattr(scix_uuid, i, getattr(uuid, i)) From 8167266163c910acfdb431ca312667fcbffcf469 Mon Sep 17 00:00:00 2001 From: tjacovich Date: Wed, 1 Nov 2023 16:45:41 -0400 Subject: [PATCH 4/5] Added docstring. --- SciXPipelineUtils/scix_uuid.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/SciXPipelineUtils/scix_uuid.py b/SciXPipelineUtils/scix_uuid.py index d137566..8400f96 100644 --- a/SciXPipelineUtils/scix_uuid.py +++ b/SciXPipelineUtils/scix_uuid.py @@ -97,6 +97,9 @@ def uuid7(): return uuid.UUID(UUIDv7_formatted) - +""" +This loops through all the uuid attributes and adds them to the scix_uuid class so they are accessible +and scix_uuid can be treated as a drop-in replacement for uuid. +""" for i in dir(uuid): setattr(scix_uuid, i, getattr(uuid, i)) From 661825d2bc217985959517f2240c992876af50e3 Mon Sep 17 00:00:00 2001 From: tjacovich Date: Wed, 1 Nov 2023 17:04:06 -0400 Subject: [PATCH 5/5] Fixed error in test name. --- SciXPipelineUtils/scix_uuid.py | 29 ++++++++++++++++++++++------- tests/test_scix_uuid.py | 4 ++-- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/SciXPipelineUtils/scix_uuid.py b/SciXPipelineUtils/scix_uuid.py index 8400f96..2aacecf 100644 --- a/SciXPipelineUtils/scix_uuid.py +++ b/SciXPipelineUtils/scix_uuid.py @@ -7,6 +7,13 @@ import time import uuid +# These are needed to keep the semi-sequential nature of the UUIDs +sequenceCounter = 0 +_last_v7timestamp = 0 +_last_uuid_int = 0 +_last_sequence = None +uuidVariant = "10" + class scix_uuid: def uuid7(): @@ -16,13 +23,15 @@ def uuid7(): format: unixts|subsec_a|version|subsec_b|variant|subsec_seq_node - :return: uuid.UUID + :param returnType: bin, int, hex + :return: bin, int, hex """ - sequenceCounter = 0 - _last_v7timestamp = 0 - uuidVariant = "10" - + global _last_v7timestamp + global _last_uuid_int + global _last_sequence + global sequenceCounter + global uuidVariant uuidVersion = "0111" # ver 7 sec_bits = 36 # unixts at second precision subsec_bits = 30 # Enough to represent NS @@ -70,6 +79,7 @@ def uuid7(): # Set these two before moving on _last_v7timestamp = timestamp + _last_sequence = int(sequenceCounter) ### Random Node Work randomInt = random.getrandbits(node_bits) @@ -83,6 +93,8 @@ def uuid7(): UUIDv7_bin = unixts + subsec_a + uuidVersion + subsec_b + uuidVariant + subsec_seq_node UUIDv7_int = int(UUIDv7_bin, 2) + _last_uuid_int = UUIDv7_int + # Convert Hex to Int then splice in dashes UUIDv7_hex = f"{UUIDv7_int:032x}" # int to hex UUIDv7_formatted = "-".join( @@ -97,9 +109,12 @@ def uuid7(): return uuid.UUID(UUIDv7_formatted) + """ -This loops through all the uuid attributes and adds them to the scix_uuid class so they are accessible -and scix_uuid can be treated as a drop-in replacement for uuid. +Added so that we can treat scix_uuid as an approximate replacement for the uuid module +noting that it is now technically a class but does not need to be instantiated to operate. +This is done because flake8 and PEP8 strongly discourage using import * """ + for i in dir(uuid): setattr(scix_uuid, i, getattr(uuid, i)) diff --git a/tests/test_scix_uuid.py b/tests/test_scix_uuid.py index 8725bfb..10cf218 100644 --- a/tests/test_scix_uuid.py +++ b/tests/test_scix_uuid.py @@ -1,11 +1,11 @@ import uuid from unittest import TestCase -import scix_uuid +from scix_uuid import scix_uuid class TestSciXUUIDImplementation(TestCase): - def generate_uuid7(self): + def test_generate_uuid7(self): test_uuid = scix_uuid.uuid7() self.assertEqual(type(test_uuid), uuid.UUID) self.assertEqual(type(test_uuid.hex), str)