From 261a9d976d9922199650985891019206fa50d0fe Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 20:36:34 -0500 Subject: [PATCH 01/18] Generate trace IDs as v7 UUIDs by default --- logfire/_internal/config.py | 5 +-- logfire/_internal/uuid7.py | 89 +++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 logfire/_internal/uuid7.py diff --git a/logfire/_internal/config.py b/logfire/_internal/config.py index 604645345..e8cf5d898 100644 --- a/logfire/_internal/config.py +++ b/logfire/_internal/config.py @@ -53,6 +53,7 @@ from rich.prompt import Confirm, Prompt from typing_extensions import Self, Unpack +from logfire._internal.uuid7 import Uuidv7TraceIdGenerator from logfire.exceptions import LogfireConfigError from logfire.sampling import SamplingOptions from logfire.sampling._tail_sampling import TailSamplingProcessor @@ -762,9 +763,7 @@ def _initialize(self) -> None: else: sampler = head tracer_provider = SDKTracerProvider( - sampler=sampler, - resource=resource, - id_generator=self.advanced.id_generator, + sampler=sampler, resource=resource, id_generator=self.advanced.id_generator or Uuidv7TraceIdGenerator() ) self._tracer_provider.shutdown() diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py new file mode 100644 index 000000000..cddd9e266 --- /dev/null +++ b/logfire/_internal/uuid7.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import os +import time +from typing import TYPE_CHECKING +from uuid import UUID + +from opentelemetry.sdk.trace.id_generator import RandomIdGenerator + +_last_timestamp_v7 = None +_last_counter_v7 = 0 + +if TYPE_CHECKING: + + def uuid7() -> UUID: ... +else: + # Try to use the built-in uuid7 function if it exists (Python 3.14+) + # Or use the vendored implementation if it doesn't + try: + from uuid import uuid7 # type: ignore + except ImportError: + # vendored from https://github.com/python/cpython/pull/121119 + + def uuid7() -> UUID: + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1FF_FFFF_FFFF + # 32-bit random data + tail = rand & 0xFFFF_FFFF + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + nanoseconds = time.time_ns() + timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the 42-bit counter + counter = _last_counter_v7 + 1 + if counter > 0x3FF_FFFF_FFFF: + timestamp_ms += 1 # advance the 48-bit timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(os.urandom(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + int_uuid_7 = (timestamp_ms & 0xFFFF_FFFF_FFFF) << 80 + int_uuid_7 |= ((counter >> 30) & 0xFFF) << 64 + int_uuid_7 |= (counter & 0x3FFF_FFFF) << 32 + int_uuid_7 |= tail & 0xFFFF_FFFF + return UUID(int=int_uuid_7, version=7) + + +class Uuidv7TraceIdGenerator(RandomIdGenerator): + """The default ID generator for Logfire. + + Trace IDs are generated using UUIDv7, which have a timestamp and a counter prefix making + them unique and sortable. + Span IDs are generated using pure randomness. + """ + + def generate_trace_id(self) -> int: + id = uuid7() + return id.int From ebe95cbcc56d2bd7fb38d67ca37d89f0a922ca9b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 20:37:21 -0500 Subject: [PATCH 02/18] fmt --- logfire/_internal/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/logfire/_internal/config.py b/logfire/_internal/config.py index e8cf5d898..1a780279b 100644 --- a/logfire/_internal/config.py +++ b/logfire/_internal/config.py @@ -763,7 +763,9 @@ def _initialize(self) -> None: else: sampler = head tracer_provider = SDKTracerProvider( - sampler=sampler, resource=resource, id_generator=self.advanced.id_generator or Uuidv7TraceIdGenerator() + sampler=sampler, + resource=resource, + id_generator=self.advanced.id_generator or Uuidv7TraceIdGenerator(), ) self._tracer_provider.shutdown() From 25c1d97991a9157dc65adf917c3198b9a20f39c9 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 20:41:59 -0500 Subject: [PATCH 03/18] coverage --- logfire/_internal/uuid7.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py index cddd9e266..fc284b6ea 100644 --- a/logfire/_internal/uuid7.py +++ b/logfire/_internal/uuid7.py @@ -21,7 +21,7 @@ def uuid7() -> UUID: ... except ImportError: # vendored from https://github.com/python/cpython/pull/121119 - def uuid7() -> UUID: + def uuid7() -> UUID: # pragma: no cover """Generate a UUID from a Unix timestamp in milliseconds and random bits. UUIDv7 objects feature monotonicity within a millisecond. From dd7d3a566f8bb92f19d8e5f86f044b17ffc71a3d Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 21:03:30 -0500 Subject: [PATCH 04/18] fix --- logfire/_internal/config.py | 3 +- logfire/_internal/utils.py | 5 +- logfire/_internal/uuid7.py | 135 ++++++++++++++++-------------------- tests/conftest.py | 3 +- tests/test_logfire.py | 25 ++++++- 5 files changed, 88 insertions(+), 83 deletions(-) diff --git a/logfire/_internal/config.py b/logfire/_internal/config.py index 1a780279b..604645345 100644 --- a/logfire/_internal/config.py +++ b/logfire/_internal/config.py @@ -53,7 +53,6 @@ from rich.prompt import Confirm, Prompt from typing_extensions import Self, Unpack -from logfire._internal.uuid7 import Uuidv7TraceIdGenerator from logfire.exceptions import LogfireConfigError from logfire.sampling import SamplingOptions from logfire.sampling._tail_sampling import TailSamplingProcessor @@ -765,7 +764,7 @@ def _initialize(self) -> None: tracer_provider = SDKTracerProvider( sampler=sampler, resource=resource, - id_generator=self.advanced.id_generator or Uuidv7TraceIdGenerator(), + id_generator=self.advanced.id_generator, ) self._tracer_provider.shutdown() diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index 649f9a756..6ad473643 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -22,6 +22,7 @@ from requests import RequestException, Response from logfire._internal.stack_info import is_user_code +from logfire._internal.uuid7 import uuid7 if TYPE_CHECKING: from packaging.version import Version @@ -384,7 +385,7 @@ def generate_span_id(self) -> int: return span_id def generate_trace_id(self) -> int: - trace_id = self.random.getrandbits(128) + trace_id = uuid7() while trace_id == trace_api.INVALID_TRACE_ID: # pragma: no cover - trace_id = self.random.getrandbits(128) + trace_id = uuid7() return trace_id diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py index fc284b6ea..e3455193f 100644 --- a/logfire/_internal/uuid7.py +++ b/logfire/_internal/uuid7.py @@ -1,89 +1,70 @@ from __future__ import annotations -import os import time -from typing import TYPE_CHECKING -from uuid import UUID - -from opentelemetry.sdk.trace.id_generator import RandomIdGenerator +from random import Random _last_timestamp_v7 = None _last_counter_v7 = 0 +_random = Random() -if TYPE_CHECKING: - - def uuid7() -> UUID: ... -else: - # Try to use the built-in uuid7 function if it exists (Python 3.14+) - # Or use the vendored implementation if it doesn't - try: - from uuid import uuid7 # type: ignore - except ImportError: - # vendored from https://github.com/python/cpython/pull/121119 - - def uuid7() -> UUID: # pragma: no cover - """Generate a UUID from a Unix timestamp in milliseconds and random bits. - - UUIDv7 objects feature monotonicity within a millisecond. - """ - # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - - # unix_ts_ms | version | counter_hi | variant | counter_lo | random - # - # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed - # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. - # - # 'random' is a 32-bit random value regenerated for every new UUID. - # - # If multiple UUIDs are generated within the same millisecond, the LSB - # of 'counter' is incremented by 1. When overflowing, the timestamp is - # advanced and the counter is reset to a random 42-bit integer with MSB - # set to 0. - - def get_counter_and_tail(): - rand = int.from_bytes(os.urandom(10)) - # 42-bit counter with MSB set to 0 - counter = (rand >> 32) & 0x1FF_FFFF_FFFF - # 32-bit random data - tail = rand & 0xFFFF_FFFF - return counter, tail - global _last_timestamp_v7 - global _last_counter_v7 +def uuid7(random: Random = _random) -> int: # pragma: no cover + """Generate a UUID from a Unix timestamp in milliseconds and random bits. - nanoseconds = time.time_ns() - timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + UUIDv7 objects feature monotonicity within a millisecond. - if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: - counter, tail = get_counter_and_tail() - else: - if timestamp_ms < _last_timestamp_v7: - timestamp_ms = _last_timestamp_v7 + 1 - # advance the 42-bit counter - counter = _last_counter_v7 + 1 - if counter > 0x3FF_FFFF_FFFF: - timestamp_ms += 1 # advance the 48-bit timestamp - counter, tail = get_counter_and_tail() - else: - tail = int.from_bytes(os.urandom(4)) + Vendored from https://github.com/python/cpython/pull/121119 + w/ addition of `random` argument and returning an integer instead of a UUID object because: - _last_timestamp_v7 = timestamp_ms - _last_counter_v7 = counter - - int_uuid_7 = (timestamp_ms & 0xFFFF_FFFF_FFFF) << 80 - int_uuid_7 |= ((counter >> 30) & 0xFFF) << 64 - int_uuid_7 |= (counter & 0x3FFF_FFFF) << 32 - int_uuid_7 |= tail & 0xFFFF_FFFF - return UUID(int=int_uuid_7, version=7) - - -class Uuidv7TraceIdGenerator(RandomIdGenerator): - """The default ID generator for Logfire. - - Trace IDs are generated using UUIDv7, which have a timestamp and a counter prefix making - them unique and sortable. - Span IDs are generated using pure randomness. + 1. We end up wanting an integer anyway. + 2. The UUID object checks the version and variant, which currently rejects v7 UUIDs. """ - - def generate_trace_id(self) -> int: - id = uuid7() - return id.int + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = random.getrandbits(10) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1FF_FFFF_FFFF + # 32-bit random data + tail = rand & 0xFFFF_FFFF + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + nanoseconds = time.time_ns() + timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the 42-bit counter + counter = _last_counter_v7 + 1 + if counter > 0x3FF_FFFF_FFFF: + timestamp_ms += 1 # advance the 48-bit timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(random.randbytes(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + int_uuid_7 = (timestamp_ms & 0xFFFF_FFFF_FFFF) << 80 + int_uuid_7 |= ((counter >> 30) & 0xFFF) << 64 + int_uuid_7 |= (counter & 0x3FFF_FFFF) << 32 + int_uuid_7 |= tail & 0xFFFF_FFFF + + return int_uuid_7 diff --git a/tests/conftest.py b/tests/conftest.py index b80391be5..dc1520230 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,7 @@ from opentelemetry import trace from opentelemetry.sdk.metrics.export import InMemoryMetricReader from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.id_generator import IdGenerator import logfire from logfire import configure @@ -55,7 +56,7 @@ def metrics_reader() -> InMemoryMetricReader: @pytest.fixture def config_kwargs( exporter: TestExporter, - id_generator: IncrementalIdGenerator, + id_generator: IdGenerator, time_generator: TimeGenerator, ) -> dict[str, Any]: """ diff --git a/tests/test_logfire.py b/tests/test_logfire.py index db8de439f..79d66d9fa 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -4,7 +4,7 @@ import re import sys from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor -from contextlib import asynccontextmanager, contextmanager +from contextlib import ExitStack, asynccontextmanager, contextmanager from dataclasses import dataclass from functools import partial from logging import getLogger @@ -37,8 +37,12 @@ ) from logfire._internal.formatter import FormattingFailedWarning, InspectArgumentsFailedWarning from logfire._internal.main import NoopSpan +<<<<<<< HEAD from logfire._internal.tracer import record_exception from logfire._internal.utils import is_instrumentation_suppressed +======= +from logfire._internal.utils import SeededRandomIdGenerator, is_instrumentation_suppressed +>>>>>>> 2327918 (fix) from logfire.integrations.logging import LogfireLoggingHandler from logfire.testing import TestExporter from tests.test_metrics import get_collected_metrics @@ -3213,3 +3217,22 @@ def test_exit_ended_span(exporter: TestExporter): } ] ) + + +@pytest.mark.parametrize( + 'id_generator', + [SeededRandomIdGenerator()], +) +def test_default_id_generator(exporter: TestExporter) -> None: + """Test that SeededRandomIdGenerator generates trace and span ids without errors.""" + with ExitStack() as stack: + for i in range(1024): + for j in range(8): + stack.enter_context(logfire.span(f'span {i}:{j}')) + + exported = exporter.exported_spans_as_dict() + + # check that trace ids are sortable and unique + sorted_by_trace_id = [export['name'] for export in sorted(exported, key=lambda span: span['context']['trace_id'])] + sorted_by_start_timestamp = [export['name'] for export in sorted(exported, key=lambda span: span['start_time'])] + assert sorted_by_trace_id == sorted_by_start_timestamp From 296730f6df710bf952b0f9f2bd22fc28a8d4452b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 21:05:18 -0500 Subject: [PATCH 05/18] tweak docstring --- logfire/_internal/uuid7.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py index e3455193f..1d57d676d 100644 --- a/logfire/_internal/uuid7.py +++ b/logfire/_internal/uuid7.py @@ -13,11 +13,13 @@ def uuid7(random: Random = _random) -> int: # pragma: no cover UUIDv7 objects feature monotonicity within a millisecond. - Vendored from https://github.com/python/cpython/pull/121119 - w/ addition of `random` argument and returning an integer instead of a UUID object because: + Vendored from https://github.com/python/cpython/pull/121119 w/ minor changes: - 1. We end up wanting an integer anyway. - 2. The UUID object checks the version and variant, which currently rejects v7 UUIDs. + 1. Added a `random` argument to allow for seeding. + 2. Return an integer instead of a UUID object because: + a. We need an integer anyway and would have just converted the UUID object to an integer + (thus it's actually faster to do it this way than it will be in CPython!). + b. The UUID object checks the version and variant, which currently rejects v7 UUIDs. """ # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - # unix_ts_ms | version | counter_hi | variant | counter_lo | random From 94a063415670cdc92f764f2f5275662b011bfad2 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 23:42:14 -0500 Subject: [PATCH 06/18] fix merge --- tests/test_logfire.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 79d66d9fa..1cbfdc472 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -37,12 +37,8 @@ ) from logfire._internal.formatter import FormattingFailedWarning, InspectArgumentsFailedWarning from logfire._internal.main import NoopSpan -<<<<<<< HEAD from logfire._internal.tracer import record_exception -from logfire._internal.utils import is_instrumentation_suppressed -======= from logfire._internal.utils import SeededRandomIdGenerator, is_instrumentation_suppressed ->>>>>>> 2327918 (fix) from logfire.integrations.logging import LogfireLoggingHandler from logfire.testing import TestExporter from tests.test_metrics import get_collected_metrics From 3db7be3727fb95a3867cfd9a405b65c262323151 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Jan 2025 23:51:29 -0500 Subject: [PATCH 07/18] fix python < 3.9 --- logfire/_internal/uuid7.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py index 1d57d676d..eb3713aa4 100644 --- a/logfire/_internal/uuid7.py +++ b/logfire/_internal/uuid7.py @@ -59,7 +59,7 @@ def get_counter_and_tail(): timestamp_ms += 1 # advance the 48-bit timestamp counter, tail = get_counter_and_tail() else: - tail = int.from_bytes(random.randbytes(4)) + tail = int.from_bytes(random.getrandbits(4 * 8).to_bytes(4, 'little')) _last_timestamp_v7 = timestamp_ms _last_counter_v7 = counter From e5ba40803fc5cf72c16c663aa8f771ac4871f80f Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 07:59:19 -0500 Subject: [PATCH 08/18] use ulid --- logfire/_internal/utils.py | 6 ++-- logfire/_internal/uuid7.py | 72 -------------------------------------- tests/test_sampling.py | 2 +- 3 files changed, 4 insertions(+), 76 deletions(-) delete mode 100644 logfire/_internal/uuid7.py diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index 6ad473643..43b9889f9 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -22,7 +22,7 @@ from requests import RequestException, Response from logfire._internal.stack_info import is_user_code -from logfire._internal.uuid7 import uuid7 +from logfire._internal.ulid import ulid if TYPE_CHECKING: from packaging.version import Version @@ -385,7 +385,7 @@ def generate_span_id(self) -> int: return span_id def generate_trace_id(self) -> int: - trace_id = uuid7() + trace_id = ulid() while trace_id == trace_api.INVALID_TRACE_ID: # pragma: no cover - trace_id = uuid7() + trace_id = ulid() return trace_id diff --git a/logfire/_internal/uuid7.py b/logfire/_internal/uuid7.py deleted file mode 100644 index eb3713aa4..000000000 --- a/logfire/_internal/uuid7.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import annotations - -import time -from random import Random - -_last_timestamp_v7 = None -_last_counter_v7 = 0 -_random = Random() - - -def uuid7(random: Random = _random) -> int: # pragma: no cover - """Generate a UUID from a Unix timestamp in milliseconds and random bits. - - UUIDv7 objects feature monotonicity within a millisecond. - - Vendored from https://github.com/python/cpython/pull/121119 w/ minor changes: - - 1. Added a `random` argument to allow for seeding. - 2. Return an integer instead of a UUID object because: - a. We need an integer anyway and would have just converted the UUID object to an integer - (thus it's actually faster to do it this way than it will be in CPython!). - b. The UUID object checks the version and variant, which currently rejects v7 UUIDs. - """ - # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - - # unix_ts_ms | version | counter_hi | variant | counter_lo | random - # - # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed - # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. - # - # 'random' is a 32-bit random value regenerated for every new UUID. - # - # If multiple UUIDs are generated within the same millisecond, the LSB - # of 'counter' is incremented by 1. When overflowing, the timestamp is - # advanced and the counter is reset to a random 42-bit integer with MSB - # set to 0. - - def get_counter_and_tail(): - rand = random.getrandbits(10) - # 42-bit counter with MSB set to 0 - counter = (rand >> 32) & 0x1FF_FFFF_FFFF - # 32-bit random data - tail = rand & 0xFFFF_FFFF - return counter, tail - - global _last_timestamp_v7 - global _last_counter_v7 - - nanoseconds = time.time_ns() - timestamp_ms, _ = divmod(nanoseconds, 1_000_000) - - if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: - counter, tail = get_counter_and_tail() - else: - if timestamp_ms < _last_timestamp_v7: - timestamp_ms = _last_timestamp_v7 + 1 - # advance the 42-bit counter - counter = _last_counter_v7 + 1 - if counter > 0x3FF_FFFF_FFFF: - timestamp_ms += 1 # advance the 48-bit timestamp - counter, tail = get_counter_and_tail() - else: - tail = int.from_bytes(random.getrandbits(4 * 8).to_bytes(4, 'little')) - - _last_timestamp_v7 = timestamp_ms - _last_counter_v7 = counter - - int_uuid_7 = (timestamp_ms & 0xFFFF_FFFF_FFFF) << 80 - int_uuid_7 |= ((counter >> 30) & 0xFFF) << 64 - int_uuid_7 |= (counter & 0x3FFF_FFFF) << 32 - int_uuid_7 |= tail & 0xFFFF_FFFF - - return int_uuid_7 diff --git a/tests/test_sampling.py b/tests/test_sampling.py index 37fe07680..da684c3fa 100644 --- a/tests/test_sampling.py +++ b/tests/test_sampling.py @@ -70,7 +70,7 @@ def test_sample_rate_config(exporter: TestExporter, config_kwargs: dict[str, Any # 1000 iterations of 2 spans -> 2000 spans # 30% sampling -> 600 spans (approximately) - assert len(exporter.exported_spans_as_dict()) == 634 + assert len(exporter.exported_spans_as_dict()) == 616 @pytest.mark.skipif( From 861b568920c77d1d753a76635625fdb0ff5dca30 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 08:25:50 -0500 Subject: [PATCH 09/18] use ulids --- logfire/_internal/ulid.py | 42 ++++++++++++++++++++++++++++++++++++++ logfire/_internal/utils.py | 4 ++-- 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 logfire/_internal/ulid.py diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py new file mode 100644 index 000000000..24ac0de1a --- /dev/null +++ b/logfire/_internal/ulid.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import time +from random import Random + +_random = Random() + + +def ulid(random: Random = _random) -> int: + """Generate an integer ULID compatible with UUID v4. + + ULIDs as defined by the [spec](https://github.com/ulid/spec) look like this: + + 01AN4Z07BY 79KA1307SR9X4MV3 + |----------| |----------------| + Timestamp Randomness + 48bits 80bits + + In the future it would be nice to make this compatible with a UUID, + e.g. v4 UUIDs by setting the version and variant bits correctly. + We can't currently do this because setting these bits would leave us with only 7 bytes of randomness, + which isn't enough for the Python SDK's sampler that currently expects 8 bytes of randomness. + In the future OTEL will probably adopt https://www.w3.org/TR/trace-context-2/#random-trace-id-flag + which relies only on the lower 7 bytes of the trace ID, then all SDKs and tooling should be updated + and leaving only 7 bytes of randomness should be fine. + + Right now we only care about: + - Our SDK / Python SDK's in general. + - The OTEL collector. + + And both behave properly with 8 bytes of randomness because trace IDs were originally 64 bits + so to be compatible with old trace IDs nothing in OTEL can assume >8 bytes of randomness in trace IDs + unless they generated the trace ID themselves (e.g. the Go SDK _does_ expect >8 bytes of randomness internally). + """ + # Timestamp: first 6 bytes of the ULID (48 bits) + # Note that it's not important that this timestamp is super precise or unique. + # It just needs to be monotonically increasing so that the ULID is sortable, at least for our purposes. + timestamp = int(time.time() * 1000).to_bytes(6, byteorder='big') + # Randomness: next 10 bytes of the ULID (80 bits) + randomness = bytearray(random.getrandbits(8) for _ in range(10)) + # Convert to int and return + return int.from_bytes(timestamp + randomness, byteorder='big') diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index 43b9889f9..e59d86abc 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -385,7 +385,7 @@ def generate_span_id(self) -> int: return span_id def generate_trace_id(self) -> int: - trace_id = ulid() + trace_id = ulid(self.random) while trace_id == trace_api.INVALID_TRACE_ID: # pragma: no cover - trace_id = ulid() + trace_id = ulid(self.random) return trace_id From 8216fed165af04c1be57eb2bfa9a3e6e23fc18a8 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 08:39:08 -0500 Subject: [PATCH 10/18] fix --- tests/test_logfire.py | 3 ++- tests/test_sampling.py | 2 +- tests/test_tail_sampling.py | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 1cbfdc472..90426f955 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -3228,7 +3228,8 @@ def test_default_id_generator(exporter: TestExporter) -> None: exported = exporter.exported_spans_as_dict() - # check that trace ids are sortable and unique + # Check that trace ids are sortable and unique + # We use ULIDs to generate trace ids, so they should be sortable. sorted_by_trace_id = [export['name'] for export in sorted(exported, key=lambda span: span['context']['trace_id'])] sorted_by_start_timestamp = [export['name'] for export in sorted(exported, key=lambda span: span['start_time'])] assert sorted_by_trace_id == sorted_by_start_timestamp diff --git a/tests/test_sampling.py b/tests/test_sampling.py index da684c3fa..2a6f16478 100644 --- a/tests/test_sampling.py +++ b/tests/test_sampling.py @@ -70,7 +70,7 @@ def test_sample_rate_config(exporter: TestExporter, config_kwargs: dict[str, Any # 1000 iterations of 2 spans -> 2000 spans # 30% sampling -> 600 spans (approximately) - assert len(exporter.exported_spans_as_dict()) == 616 + assert len(exporter.exported_spans_as_dict()) == 570, len(exporter.exported_spans_as_dict()) @pytest.mark.skipif( diff --git a/tests/test_tail_sampling.py b/tests/test_tail_sampling.py index a57c1f689..90645c418 100644 --- a/tests/test_tail_sampling.py +++ b/tests/test_tail_sampling.py @@ -345,7 +345,7 @@ def test_background_rate(config_kwargs: dict[str, Any], exporter: TestExporter): # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 321 + assert len(exporter.exported_spans) == 100 + 310, len(exporter.exported_spans) class TestSampler(Sampler): @@ -406,7 +406,7 @@ def test_raw_head_sampler_with_tail_sampling(config_kwargs: dict[str, Any], expo # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 315 + assert len(exporter.exported_spans) == 100 + 316, len(exporter.exported_spans) def test_custom_head_and_tail(config_kwargs: dict[str, Any], exporter: TestExporter): @@ -432,20 +432,20 @@ def get_tail_sample_rate(span_info: TailSamplingSpanInfo) -> float: for _ in range(1000): logfire.warn('warn') - assert span_counts == snapshot({'start': 720, 'end': 617}) - assert len(exporter.exported_spans) == snapshot(103) + assert span_counts == snapshot({'start': 710, 'end': 610}) + assert len(exporter.exported_spans) == snapshot(100) assert span_counts['end'] + len(exporter.exported_spans) == span_counts['start'] exporter.clear() for _ in range(1000): with logfire.span('span'): pass - assert len(exporter.exported_spans_as_dict()) == snapshot(505) + assert len(exporter.exported_spans_as_dict()) == snapshot(502) exporter.clear() for _ in range(1000): logfire.error('error') - assert len(exporter.exported_spans) == snapshot(282) + assert len(exporter.exported_spans) == snapshot(288) def test_span_levels(): From 47e1aaf2a4a97994a9975ad2776c46069bb54ceb Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:16:34 -0500 Subject: [PATCH 11/18] simplify --- logfire/_internal/ulid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py index 24ac0de1a..a1a0355f4 100644 --- a/logfire/_internal/ulid.py +++ b/logfire/_internal/ulid.py @@ -37,6 +37,6 @@ def ulid(random: Random = _random) -> int: # It just needs to be monotonically increasing so that the ULID is sortable, at least for our purposes. timestamp = int(time.time() * 1000).to_bytes(6, byteorder='big') # Randomness: next 10 bytes of the ULID (80 bits) - randomness = bytearray(random.getrandbits(8) for _ in range(10)) + randomness = random.getrandbits(80).to_bytes(10, byteorder='big') # Convert to int and return return int.from_bytes(timestamp + randomness, byteorder='big') From 3ad769d29d9afea633f626ee4ff0322b4ef2da84 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:32:11 -0500 Subject: [PATCH 12/18] fix --- tests/test_sampling.py | 2 +- tests/test_tail_sampling.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_sampling.py b/tests/test_sampling.py index 2a6f16478..be57c0758 100644 --- a/tests/test_sampling.py +++ b/tests/test_sampling.py @@ -70,7 +70,7 @@ def test_sample_rate_config(exporter: TestExporter, config_kwargs: dict[str, Any # 1000 iterations of 2 spans -> 2000 spans # 30% sampling -> 600 spans (approximately) - assert len(exporter.exported_spans_as_dict()) == 570, len(exporter.exported_spans_as_dict()) + assert len(exporter.exported_spans_as_dict()) == 588, len(exporter.exported_spans_as_dict()) @pytest.mark.skipif( diff --git a/tests/test_tail_sampling.py b/tests/test_tail_sampling.py index 90645c418..a24b0d5bc 100644 --- a/tests/test_tail_sampling.py +++ b/tests/test_tail_sampling.py @@ -345,7 +345,7 @@ def test_background_rate(config_kwargs: dict[str, Any], exporter: TestExporter): # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 310, len(exporter.exported_spans) + assert len(exporter.exported_spans) == 100 + 299, len(exporter.exported_spans) class TestSampler(Sampler): @@ -406,7 +406,7 @@ def test_raw_head_sampler_with_tail_sampling(config_kwargs: dict[str, Any], expo # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 316, len(exporter.exported_spans) + assert len(exporter.exported_spans) == 100 + 293, len(exporter.exported_spans) def test_custom_head_and_tail(config_kwargs: dict[str, Any], exporter: TestExporter): @@ -432,20 +432,20 @@ def get_tail_sample_rate(span_info: TailSamplingSpanInfo) -> float: for _ in range(1000): logfire.warn('warn') - assert span_counts == snapshot({'start': 710, 'end': 610}) - assert len(exporter.exported_spans) == snapshot(100) + assert span_counts == snapshot({'start': 719, 'end': 611}) + assert len(exporter.exported_spans) == snapshot(108) assert span_counts['end'] + len(exporter.exported_spans) == span_counts['start'] exporter.clear() for _ in range(1000): with logfire.span('span'): pass - assert len(exporter.exported_spans_as_dict()) == snapshot(502) + assert len(exporter.exported_spans_as_dict()) == snapshot(511) exporter.clear() for _ in range(1000): logfire.error('error') - assert len(exporter.exported_spans) == snapshot(288) + assert len(exporter.exported_spans) == snapshot(298) def test_span_levels(): From 60e36aaffaad462e7f846ac05f62d9e0cc0de5a9 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:02:22 -0500 Subject: [PATCH 13/18] better test --- tests/test_logfire.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 90426f955..2e42bbfd8 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -8,6 +8,7 @@ from dataclasses import dataclass from functools import partial from logging import getLogger +from time import sleep from typing import Any, Callable from unittest.mock import patch @@ -3221,15 +3222,28 @@ def test_exit_ended_span(exporter: TestExporter): ) def test_default_id_generator(exporter: TestExporter) -> None: """Test that SeededRandomIdGenerator generates trace and span ids without errors.""" - with ExitStack() as stack: - for i in range(1024): + for i in range(32): + with ExitStack() as stack: for j in range(8): stack.enter_context(logfire.span(f'span {i}:{j}')) + # make sure we get a mix of traces spread across ms + # test should take < 32 * 8 ms (0.25s) to run + sleep(1 / 1000 / 5) exported = exporter.exported_spans_as_dict() + # sanity check: there are 32 trace ids + assert len({export['context']['trace_id'] for export in exported}) == 32 + # sanity check: there are multiple milliseconds (first 6 bytes) + assert len({export['context']['trace_id'] >> 64 for export in exported}) >= 5 # 5 is an arbitrary number + # Check that trace ids are sortable and unique # We use ULIDs to generate trace ids, so they should be sortable. - sorted_by_trace_id = [export['name'] for export in sorted(exported, key=lambda span: span['context']['trace_id'])] + sorted_by_trace_id = [ + export['name'] + # sort by trace_id and start_time so that if two trace ids were generated in the same ms and thus may sort randomly + # we disambiguate with the start time + for export in sorted(exported, key=lambda span: (span['context']['trace_id'], span['start_time'])) + ] sorted_by_start_timestamp = [export['name'] for export in sorted(exported, key=lambda span: span['start_time'])] assert sorted_by_trace_id == sorted_by_start_timestamp From df089753fd725ec98902d090a2022b5e10e650db Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:26:30 -0600 Subject: [PATCH 14/18] Update logfire/_internal/ulid.py Co-authored-by: Alex Hall --- logfire/_internal/ulid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py index a1a0355f4..1ebdc2126 100644 --- a/logfire/_internal/ulid.py +++ b/logfire/_internal/ulid.py @@ -34,7 +34,7 @@ def ulid(random: Random = _random) -> int: """ # Timestamp: first 6 bytes of the ULID (48 bits) # Note that it's not important that this timestamp is super precise or unique. - # It just needs to be monotonically increasing so that the ULID is sortable, at least for our purposes. + # It just needs to be roughly monotonically increasing so that the ULID is sortable, at least for our purposes. timestamp = int(time.time() * 1000).to_bytes(6, byteorder='big') # Randomness: next 10 bytes of the ULID (80 bits) randomness = random.getrandbits(80).to_bytes(10, byteorder='big') From ab09e2d25e4e29a02001c71d91b2c4b232779a21 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:43:25 -0500 Subject: [PATCH 15/18] pr feedback --- logfire/_internal/ulid.py | 8 +++----- logfire/_internal/utils.py | 12 +++++++++--- tests/test_logfire.py | 25 +++++++++++++++---------- tests/test_tail_sampling.py | 4 ++-- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py index 1ebdc2126..92c0b0533 100644 --- a/logfire/_internal/ulid.py +++ b/logfire/_internal/ulid.py @@ -1,12 +1,10 @@ from __future__ import annotations -import time from random import Random +from typing import Callable -_random = Random() - -def ulid(random: Random = _random) -> int: +def ulid(random: Random, ms_ts_genertor: Callable[[], int]) -> int: """Generate an integer ULID compatible with UUID v4. ULIDs as defined by the [spec](https://github.com/ulid/spec) look like this: @@ -35,7 +33,7 @@ def ulid(random: Random = _random) -> int: # Timestamp: first 6 bytes of the ULID (48 bits) # Note that it's not important that this timestamp is super precise or unique. # It just needs to be roughly monotonically increasing so that the ULID is sortable, at least for our purposes. - timestamp = int(time.time() * 1000).to_bytes(6, byteorder='big') + timestamp = ms_ts_genertor().to_bytes(6, byteorder='big') # Randomness: next 10 bytes of the ULID (80 bits) randomness = random.getrandbits(80).to_bytes(10, byteorder='big') # Convert to int and return diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index e59d86abc..0591b218c 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -9,8 +9,9 @@ from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path +from time import time from types import TracebackType -from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Sequence, Tuple, TypedDict, TypeVar, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Sequence, Tuple, TypedDict, TypeVar, Union from opentelemetry import context, trace as trace_api from opentelemetry.sdk.resources import Resource @@ -359,6 +360,10 @@ def is_asgi_send_receive_span_name(name: str) -> bool: return name.endswith((' http send', ' http receive', ' websocket send', ' websocket receive')) +def _ms_ts_generator() -> int: + return int(time() * 1000) + + @dataclass(repr=True) class SeededRandomIdGenerator(IdGenerator): """Generate random span/trace IDs from a seed for deterministic tests. @@ -372,6 +377,7 @@ class SeededRandomIdGenerator(IdGenerator): """ seed: int | None = 0 + ms_timestamp_generator: Callable[[], int] = _ms_ts_generator def __post_init__(self) -> None: self.random = random.Random(self.seed) @@ -385,7 +391,7 @@ def generate_span_id(self) -> int: return span_id def generate_trace_id(self) -> int: - trace_id = ulid(self.random) + trace_id = ulid(self.random, self.ms_timestamp_generator) while trace_id == trace_api.INVALID_TRACE_ID: # pragma: no cover - trace_id = ulid(self.random) + trace_id = ulid(self.random, self.ms_timestamp_generator) return trace_id diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 2e42bbfd8..db0172573 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -8,7 +8,6 @@ from dataclasses import dataclass from functools import partial from logging import getLogger -from time import sleep from typing import Any, Callable from unittest.mock import patch @@ -3216,26 +3215,32 @@ def test_exit_ended_span(exporter: TestExporter): ) +_ns_currnet_ts = 0 + + +def incrementing_ms_ts_generator() -> int: + global _ns_currnet_ts + _ns_currnet_ts += 42 # some randon number that results in non-whole ms + return _ns_currnet_ts // 1_000_000 + + @pytest.mark.parametrize( 'id_generator', - [SeededRandomIdGenerator()], + [SeededRandomIdGenerator(ms_timestamp_generator=incrementing_ms_ts_generator)], ) def test_default_id_generator(exporter: TestExporter) -> None: """Test that SeededRandomIdGenerator generates trace and span ids without errors.""" - for i in range(32): + for i in range(1024): with ExitStack() as stack: - for j in range(8): + for j in range(32): stack.enter_context(logfire.span(f'span {i}:{j}')) - # make sure we get a mix of traces spread across ms - # test should take < 32 * 8 ms (0.25s) to run - sleep(1 / 1000 / 5) exported = exporter.exported_spans_as_dict() - # sanity check: there are 32 trace ids - assert len({export['context']['trace_id'] for export in exported}) == 32 + # sanity check: there are 1024 trace ids + assert len({export['context']['trace_id'] for export in exported}) == 1024 # sanity check: there are multiple milliseconds (first 6 bytes) - assert len({export['context']['trace_id'] >> 64 for export in exported}) >= 5 # 5 is an arbitrary number + assert len({export['context']['trace_id'] >> 64 for export in exported}) == snapshot(1015) # Check that trace ids are sortable and unique # We use ULIDs to generate trace ids, so they should be sortable. diff --git a/tests/test_tail_sampling.py b/tests/test_tail_sampling.py index a24b0d5bc..7bd6373e8 100644 --- a/tests/test_tail_sampling.py +++ b/tests/test_tail_sampling.py @@ -345,7 +345,7 @@ def test_background_rate(config_kwargs: dict[str, Any], exporter: TestExporter): # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 299, len(exporter.exported_spans) + assert len(exporter.exported_spans) - 100 == snapshot(299) class TestSampler(Sampler): @@ -406,7 +406,7 @@ def test_raw_head_sampler_with_tail_sampling(config_kwargs: dict[str, Any], expo # None of them meet the tail sampling criteria. for _ in range(1000): logfire.info('info') - assert len(exporter.exported_spans) == 100 + 293, len(exporter.exported_spans) + assert len(exporter.exported_spans) - 100 == snapshot(293) def test_custom_head_and_tail(config_kwargs: dict[str, Any], exporter: TestExporter): From 29716d2479a6edb907ccfd9b71a9d9f53ae6f651 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:11:03 -0500 Subject: [PATCH 16/18] fix --- logfire/_internal/config.py | 2 +- logfire/_internal/ulid.py | 9 +++++++-- logfire/_internal/utils.py | 14 +++++--------- tests/test_logfire.py | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/logfire/_internal/config.py b/logfire/_internal/config.py index 604645345..7a248507b 100644 --- a/logfire/_internal/config.py +++ b/logfire/_internal/config.py @@ -582,7 +582,7 @@ def _load_configuration( # This is particularly for deserializing from a dict as in executors.py advanced = AdvancedOptions(**advanced) # type: ignore id_generator = advanced.id_generator - if isinstance(id_generator, dict) and list(id_generator.keys()) == ['seed']: # type: ignore # pragma: no branch + if isinstance(id_generator, dict) and list(id_generator.keys()) == ['seed', '_ms_timestamp_generator']: # type: ignore # pragma: no branch advanced.id_generator = SeededRandomIdGenerator(**id_generator) # type: ignore elif advanced is None: advanced = AdvancedOptions(base_url=param_manager.load_param('base_url')) diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py index 92c0b0533..0d49a7b77 100644 --- a/logfire/_internal/ulid.py +++ b/logfire/_internal/ulid.py @@ -1,10 +1,15 @@ from __future__ import annotations from random import Random +from time import time from typing import Callable -def ulid(random: Random, ms_ts_genertor: Callable[[], int]) -> int: +def _ms_timestamp_generator() -> int: + return int(time() * 1000) + + +def ulid(random: Random, ms_timestamp_generator: Callable[[], int] | None) -> int: """Generate an integer ULID compatible with UUID v4. ULIDs as defined by the [spec](https://github.com/ulid/spec) look like this: @@ -33,7 +38,7 @@ def ulid(random: Random, ms_ts_genertor: Callable[[], int]) -> int: # Timestamp: first 6 bytes of the ULID (48 bits) # Note that it's not important that this timestamp is super precise or unique. # It just needs to be roughly monotonically increasing so that the ULID is sortable, at least for our purposes. - timestamp = ms_ts_genertor().to_bytes(6, byteorder='big') + timestamp = (ms_timestamp_generator or _ms_timestamp_generator)().to_bytes(6, byteorder='big') # Randomness: next 10 bytes of the ULID (80 bits) randomness = random.getrandbits(80).to_bytes(10, byteorder='big') # Convert to int and return diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index 0591b218c..b3974350e 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -9,7 +9,6 @@ from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path -from time import time from types import TracebackType from typing import TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Sequence, Tuple, TypedDict, TypeVar, Union @@ -360,11 +359,7 @@ def is_asgi_send_receive_span_name(name: str) -> bool: return name.endswith((' http send', ' http receive', ' websocket send', ' websocket receive')) -def _ms_ts_generator() -> int: - return int(time() * 1000) - - -@dataclass(repr=True) +@dataclass(repr=True, eq=True) class SeededRandomIdGenerator(IdGenerator): """Generate random span/trace IDs from a seed for deterministic tests. @@ -377,7 +372,8 @@ class SeededRandomIdGenerator(IdGenerator): """ seed: int | None = 0 - ms_timestamp_generator: Callable[[], int] = _ms_ts_generator + _ms_timestamp_generator: Callable[[], int] | None = None + """Private argument, do not set this directly.""" def __post_init__(self) -> None: self.random = random.Random(self.seed) @@ -391,7 +387,7 @@ def generate_span_id(self) -> int: return span_id def generate_trace_id(self) -> int: - trace_id = ulid(self.random, self.ms_timestamp_generator) + trace_id = ulid(self.random, self._ms_timestamp_generator) while trace_id == trace_api.INVALID_TRACE_ID: # pragma: no cover - trace_id = ulid(self.random, self.ms_timestamp_generator) + trace_id = ulid(self.random, self._ms_timestamp_generator) return trace_id diff --git a/tests/test_logfire.py b/tests/test_logfire.py index db0172573..7e799d138 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -3226,7 +3226,7 @@ def incrementing_ms_ts_generator() -> int: @pytest.mark.parametrize( 'id_generator', - [SeededRandomIdGenerator(ms_timestamp_generator=incrementing_ms_ts_generator)], + [SeededRandomIdGenerator(_ms_timestamp_generator=incrementing_ms_ts_generator)], ) def test_default_id_generator(exporter: TestExporter) -> None: """Test that SeededRandomIdGenerator generates trace and span ids without errors.""" From a478fa3504b1a8832c9277d284808556288c9bca Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:13:24 -0500 Subject: [PATCH 17/18] simplify --- logfire/_internal/ulid.py | 9 ++------- logfire/_internal/utils.py | 7 ++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py index 0d49a7b77..65498814a 100644 --- a/logfire/_internal/ulid.py +++ b/logfire/_internal/ulid.py @@ -1,15 +1,10 @@ from __future__ import annotations from random import Random -from time import time from typing import Callable -def _ms_timestamp_generator() -> int: - return int(time() * 1000) - - -def ulid(random: Random, ms_timestamp_generator: Callable[[], int] | None) -> int: +def ulid(random: Random, ms_timestamp_generator: Callable[[], int]) -> int: """Generate an integer ULID compatible with UUID v4. ULIDs as defined by the [spec](https://github.com/ulid/spec) look like this: @@ -38,7 +33,7 @@ def ulid(random: Random, ms_timestamp_generator: Callable[[], int] | None) -> in # Timestamp: first 6 bytes of the ULID (48 bits) # Note that it's not important that this timestamp is super precise or unique. # It just needs to be roughly monotonically increasing so that the ULID is sortable, at least for our purposes. - timestamp = (ms_timestamp_generator or _ms_timestamp_generator)().to_bytes(6, byteorder='big') + timestamp = ms_timestamp_generator().to_bytes(6, byteorder='big') # Randomness: next 10 bytes of the ULID (80 bits) randomness = random.getrandbits(80).to_bytes(10, byteorder='big') # Convert to int and return diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py index b3974350e..2504cabd4 100644 --- a/logfire/_internal/utils.py +++ b/logfire/_internal/utils.py @@ -9,6 +9,7 @@ from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path +from time import time from types import TracebackType from typing import TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Sequence, Tuple, TypedDict, TypeVar, Union @@ -359,6 +360,10 @@ def is_asgi_send_receive_span_name(name: str) -> bool: return name.endswith((' http send', ' http receive', ' websocket send', ' websocket receive')) +def _default_ms_timestamp_generator() -> int: + return int(time() * 1000) + + @dataclass(repr=True, eq=True) class SeededRandomIdGenerator(IdGenerator): """Generate random span/trace IDs from a seed for deterministic tests. @@ -372,7 +377,7 @@ class SeededRandomIdGenerator(IdGenerator): """ seed: int | None = 0 - _ms_timestamp_generator: Callable[[], int] | None = None + _ms_timestamp_generator: Callable[[], int] = _default_ms_timestamp_generator """Private argument, do not set this directly.""" def __post_init__(self) -> None: From 685cec18cc3445c30bc8d2aae2d704177733e80d Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Wed, 8 Jan 2025 22:25:32 +0200 Subject: [PATCH 18/18] fix test --- tests/test_logfire.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 7e799d138..d3a7c761c 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -4,7 +4,7 @@ import re import sys from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor -from contextlib import ExitStack, asynccontextmanager, contextmanager +from contextlib import asynccontextmanager, contextmanager from dataclasses import dataclass from functools import partial from logging import getLogger @@ -3220,7 +3220,7 @@ def test_exit_ended_span(exporter: TestExporter): def incrementing_ms_ts_generator() -> int: global _ns_currnet_ts - _ns_currnet_ts += 42 # some randon number that results in non-whole ms + _ns_currnet_ts += 420_000 # some randon number that results in non-whole ms return _ns_currnet_ts // 1_000_000 @@ -3231,24 +3231,24 @@ def incrementing_ms_ts_generator() -> int: def test_default_id_generator(exporter: TestExporter) -> None: """Test that SeededRandomIdGenerator generates trace and span ids without errors.""" for i in range(1024): - with ExitStack() as stack: - for j in range(32): - stack.enter_context(logfire.span(f'span {i}:{j}')) + logfire.info('log', i=i) exported = exporter.exported_spans_as_dict() # sanity check: there are 1024 trace ids assert len({export['context']['trace_id'] for export in exported}) == 1024 # sanity check: there are multiple milliseconds (first 6 bytes) - assert len({export['context']['trace_id'] >> 64 for export in exported}) == snapshot(1015) + assert len({export['context']['trace_id'] >> 80 for export in exported}) == snapshot(431) # Check that trace ids are sortable and unique # We use ULIDs to generate trace ids, so they should be sortable. sorted_by_trace_id = [ - export['name'] + export['attributes']['i'] # sort by trace_id and start_time so that if two trace ids were generated in the same ms and thus may sort randomly # we disambiguate with the start time - for export in sorted(exported, key=lambda span: (span['context']['trace_id'], span['start_time'])) + for export in sorted(exported, key=lambda span: (span['context']['trace_id'] >> 80, span['start_time'])) + ] + sorted_by_start_timestamp = [ + export['attributes']['i'] for export in sorted(exported, key=lambda span: span['start_time']) ] - sorted_by_start_timestamp = [export['name'] for export in sorted(exported, key=lambda span: span['start_time'])] assert sorted_by_trace_id == sorted_by_start_timestamp