diff --git a/BUILD.bazel b/BUILD.bazel index a95c9d9d..3853a19c 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -113,6 +113,7 @@ cc_library( "include/datadog/span_matcher.h", "include/datadog/span_sampler_config.h", "include/datadog/string_view.h", + "include/datadog/tls_storage.h", "include/datadog/tracer.h", "include/datadog/tracer_config.h", "include/datadog/tracer_signature.h", diff --git a/README.md b/README.md index 310a1199..36c0efd9 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,14 @@ options is not needed. The `-ldd_trace_cpp` option is always needed. c++ -o my_app my_app.o -L/path/to/dd-trace-cpp/.install/lib -ldd_trace_cpp ``` +### Optional: Trace to Profile correlation +In order to correlate traces to profiles generated by the full host profiler[[1](https://github.com/DataDog/opentelemetry-ebpf-profiler), [2](https://github.com/DataDog/dd-otel-host-profiler)], +there are a couple of requirements for the build: + +[//]: # (TODO: add a link to the specification) +- The app must be built on linux since the profiler leverages eBPF, a Linux kernel feature. +- The compiler used for the build must support the `-mlts-dialect={gnu2/desc}` flag (GCC-11+ and Clang-19+). + Test ---- Pass `-DDD_TRACE_BUILD_TESTING=1` to `cmake` to include the unit tests in the build. diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index c726e637..313ac404 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -5,6 +5,26 @@ add_library(dd_trace_cpp-specs INTERFACE) add_library(dd_trace::specs ALIAS dd_trace_cpp-specs) +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(TLS_DIALECT desc) + elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + set(TLS_DIALECT gnu2) + else() + message(FATAL_ERROR "Only aarch64 and x86-64 are supported (found: ${CMAKE_SYSTEM_PROCESSOR})") + endif() + + include(CheckCompilerFlag) + check_compiler_flag(CXX "-mtls-dialect=${TLS_DIALECT}" TLS_DIALECT_OK) + if (TLS_DIALECT_OK) + target_compile_options(dd_trace_cpp-specs INTERFACE + -fPIC + -ftls-model=global-dynamic + -mtls-dialect=${TLS_DIALECT} + ) + endif() +endif() + target_compile_options(dd_trace_cpp-specs INTERFACE -Wall diff --git a/cmake/compiler/gcc.cmake b/cmake/compiler/gcc.cmake index 64cefe8f..a0ef271c 100644 --- a/cmake/compiler/gcc.cmake +++ b/cmake/compiler/gcc.cmake @@ -5,6 +5,26 @@ add_library(dd_trace_cpp-specs INTERFACE) add_library(dd_trace::specs ALIAS dd_trace_cpp-specs) +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(TLS_DIALECT desc) + elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + set(TLS_DIALECT gnu2) + else() + message(FATAL_ERROR "Only aarch64 and x86-64 are supported (found: ${CMAKE_SYSTEM_PROCESSOR})") + endif() + + include(CheckCompilerFlag) + check_compiler_flag(CXX "-mtls-dialect=${TLS_DIALECT}" TLS_DIALECT_OK) + if (TLS_DIALECT_OK) + target_compile_options(dd_trace_cpp-specs INTERFACE + -fPIC + -ftls-model=global-dynamic + -mtls-dialect=${TLS_DIALECT} + ) + endif() +endif() + target_compile_options(dd_trace_cpp-specs INTERFACE -Wall diff --git a/include/datadog/environment.h b/include/datadog/environment.h index 266737dc..efeabe1f 100644 --- a/include/datadog/environment.h +++ b/include/datadog/environment.h @@ -50,6 +50,7 @@ namespace environment { MACRO(DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH) \ MACRO(DD_VERSION) \ MACRO(DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED) \ + MACRO(DD_TRACE_CORRELATE_FULL_HOST_PROFILES) \ MACRO(DD_TELEMETRY_HEARTBEAT_INTERVAL) \ MACRO(DD_TELEMETRY_METRICS_ENABLED) \ MACRO(DD_TELEMETRY_METRICS_INTERVAL_SECONDS) \ diff --git a/include/datadog/tls_storage.h b/include/datadog/tls_storage.h new file mode 100644 index 00000000..c0be0cbc --- /dev/null +++ b/include/datadog/tls_storage.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +#include +#include + +// Global struct used to exposed thread-specific information. +// https://github.com/elastic/apm/blob/149cd3e39a77a58002344270ed2ad35357bdd02d/specs/agents/universal-profiling-integration.md#thread-local-storage-layout + +namespace datadog { +namespace tracing { +struct __attribute__((packed)) TLSStorage { + uint16_t layout_minor_version; + uint8_t valid; + uint8_t trace_present; + uint8_t trace_flags; + uint64_t trace_id_low; + uint64_t trace_id_high; + uint64_t span_id; + uint64_t transaction_id; +}; + +} // namespace tracing +} // namespace datadog diff --git a/include/datadog/trace_segment.h b/include/datadog/trace_segment.h index 4db61f85..cd31c7c9 100644 --- a/include/datadog/trace_segment.h +++ b/include/datadog/trace_segment.h @@ -102,6 +102,8 @@ class TraceSegment { const Optional& origin() const; Optional sampling_decision() const; + uint64_t local_root_id() const; + Logger& logger() const; // Inject trace context for the specified `span` into the specified `writer`. diff --git a/include/datadog/tracer.h b/include/datadog/tracer.h index 5c183795..7735ec5f 100644 --- a/include/datadog/tracer.h +++ b/include/datadog/tracer.h @@ -10,6 +10,10 @@ // obtained from a `TracerConfig` via the `finalize_config` function. See // `tracer_config.h`. +#ifdef __linux__ +#include +#endif + #include #include @@ -23,6 +27,12 @@ #include "tracer_config.h" #include "tracer_signature.h" +#ifdef __linux__ +extern const void* elastic_apm_profiling_correlation_process_storage_v1; +extern thread_local struct datadog::tracing::TLSStorage* + elastic_apm_profiling_correlation_tls_v1; +#endif + namespace datadog { namespace tracing { @@ -54,6 +64,7 @@ class Tracer { Baggage::Options baggage_opts_; bool baggage_injection_enabled_; bool baggage_extraction_enabled_; + bool correlate_full_host_profiles_; public: // Create a tracer configured using the specified `config`, and optionally: @@ -105,6 +116,9 @@ class Tracer { std::string config() const; private: +#ifdef __linux__ + void correlate(const Span& span); +#endif void store_config(); }; diff --git a/include/datadog/tracer_config.h b/include/datadog/tracer_config.h index ab8608d0..af4be8ca 100644 --- a/include/datadog/tracer_config.h +++ b/include/datadog/tracer_config.h @@ -133,6 +133,14 @@ struct TracerConfig { // the `DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED` environment variable. Optional generate_128bit_trace_ids; + // `correlate_full_host_profiles` indicates whether we want to correlate + // traces and spans with profiles generated by the eBPF full host profiler. + // This correlation only works on linux, due to the eBPF-based nature of + // the profiler. It implies writing some process-level and thread-level + // data in variables which the profiler will then read from the process's + // memory. + Optional correlate_full_host_profiles; + // `runtime_id` denotes the current run of the application in which the tracer // is embedded. If `runtime_id` is not specified, then it defaults to a // pseudo-randomly generated value. A server that contains multiple tracers, @@ -197,6 +205,7 @@ class FinalizedTracerConfig final { std::shared_ptr logger; bool log_on_startup; bool generate_128bit_trace_ids; + bool correlate_full_host_profiles; Optional runtime_id; Clock clock; std::string integration_name; diff --git a/include/datadog/tracer_signature.h b/include/datadog/tracer_signature.h index 042b4d37..e0ca8203 100644 --- a/include/datadog/tracer_signature.h +++ b/include/datadog/tracer_signature.h @@ -19,6 +19,12 @@ // polling the Datadog Agent. See // `RemoteConfigurationManager::process_response` in `remote_config.h`. +#ifdef __linux__ +#include +#include +#include +#endif + #include #include "runtime_id.h" @@ -31,6 +37,17 @@ namespace datadog { namespace tracing { +#ifdef __linux__ +namespace { +void write_utf8_string(std::vector& buffer, const std::string& str) { + uint32_t length = str.length(); + buffer.insert(buffer.end(), reinterpret_cast(&length), + reinterpret_cast(&length) + sizeof(length)); + buffer.insert(buffer.end(), str.begin(), str.end()); +} +} // namespace +#endif + struct TracerSignature { RuntimeID runtime_id; std::string default_service; @@ -47,6 +64,32 @@ struct TracerSignature { library_version(tracer_version), library_language("cpp"), library_language_version(DD_TRACE_STRINGIFY(__cplusplus), 6) {} + +#ifdef __linux__ + // The process correlation storage contains information needed to + // correlate traces to profiles generated by dd-otel-host-profiler. + const std::unique_ptr generate_process_correlation_storage() { + std::vector buffer; + + // Currently, layout minor version is 2 to differ from Elastic's + // version which includes a socket path. + // Layout: + // https://github.com/elastic/apm/blob/149cd3e39a77a58002344270ed2ad35357bdd02d/specs/agents/universal-profiling-integration.md#process-storage-layout + uint16_t layout_minor_version = 2; + buffer.insert(buffer.end(), + reinterpret_cast(&layout_minor_version), + reinterpret_cast(&layout_minor_version) + + sizeof(layout_minor_version)); + + write_utf8_string(buffer, default_service); + write_utf8_string(buffer, default_environment); + write_utf8_string(buffer, runtime_id.string()); + + uint8_t* res = new uint8_t[buffer.size()]; + memcpy(res, buffer.data(), buffer.size()); + return std::make_unique(res); + } +#endif }; } // namespace tracing diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index bd0f5c0d..ac2d27d4 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -185,8 +185,11 @@ DatadogAgent::~DatadogAgent() { Expected DatadogAgent::send( std::vector>&& spans, const std::shared_ptr& response_handler) { - std::lock_guard lock(mutex_); - trace_chunks_.push_back(TraceChunk{std::move(spans), response_handler}); + { + std::lock_guard lock(mutex_); + trace_chunks_.push_back(TraceChunk{std::move(spans), response_handler}); + } + flush(); return nullopt; } diff --git a/src/datadog/span.cpp b/src/datadog/span.cpp index f7419841..f67ed67f 100644 --- a/src/datadog/span.cpp +++ b/src/datadog/span.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,16 @@ Span::~Span() { data_->duration = now - data_->start; } +#ifdef __linux__ + // When a span is finished, we must update the span_id to its parent's. + if (elastic_apm_profiling_correlation_process_storage_v1 != nullptr && + parent_id().has_value()) { + elastic_apm_profiling_correlation_tls_v1->valid = 0; + elastic_apm_profiling_correlation_tls_v1->span_id = parent_id().value(); + elastic_apm_profiling_correlation_tls_v1->valid = 1; + } +#endif + trace_segment_->span_finished(); } diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index b7f74c0f..2d90c1cd 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -137,6 +138,8 @@ Optional TraceSegment::sampling_decision() const { return sampling_decision_; } +uint64_t TraceSegment::local_root_id() const { return spans_.front()->span_id; } + Logger& TraceSegment::logger() const { return *logger_; } void TraceSegment::register_span(std::unique_ptr span) { @@ -255,6 +258,12 @@ void TraceSegment::span_finished() { } telemetry::counter::increment(metrics::tracer::trace_segments_closed); + +#ifdef __linux__ + // When all spans are finished, so is the current trace. + if (elastic_apm_profiling_correlation_process_storage_v1 != nullptr) + elastic_apm_profiling_correlation_tls_v1->trace_present = 0; +#endif } void TraceSegment::override_sampling_priority(SamplingPriority priority) { diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index e0f34a1e..cc6383ad 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -13,6 +13,9 @@ #include #include +#ifdef __linux__ +#include +#endif #include "config_manager.h" #include "datadog_agent.h" @@ -30,6 +33,14 @@ #include "trace_sampler.h" #include "w3c_propagation.h" +#ifdef __linux__ +const void* elastic_apm_profiling_correlation_process_storage_v1 = nullptr; +thread_local struct datadog::tracing::TLSStorage* + elastic_apm_profiling_correlation_tls_v1 = nullptr; +thread_local std::unique_ptr tls_info_holder = + nullptr; +#endif + namespace datadog { namespace tracing { @@ -58,7 +69,15 @@ Tracer::Tracer(const FinalizedTracerConfig& config, tags_header_max_size_(config.tags_header_size), baggage_opts_(config.baggage_opts), baggage_injection_enabled_(false), - baggage_extraction_enabled_(false) { + baggage_extraction_enabled_(false), + correlate_full_host_profiles_(config.correlate_full_host_profiles) { +#ifdef __linux__ + // TODO: change the way this is done to handle programs that fork. + if (correlate_full_host_profiles_) + elastic_apm_profiling_correlation_process_storage_v1 = + *signature_.generate_process_correlation_storage(); +#endif + telemetry::init(config.telemetry, logger_, config.http_client, config.event_scheduler, config.agent_url); if (config.report_hostname) { @@ -101,6 +120,33 @@ Tracer::Tracer(const FinalizedTracerConfig& config, store_config(); } +#ifdef __linux__ +void Tracer::correlate(const Span& span) { + // See Layout: + // https://github.com/elastic/apm/blob/149cd3e39a77a58002344270ed2ad35357bdd02d/specs/agents/universal-profiling-integration.md#thread-local-storage-layout + tls_info_holder = std::make_unique(); + elastic_apm_profiling_correlation_tls_v1 = tls_info_holder.get(); + + struct TLSStorage* tls_data = elastic_apm_profiling_correlation_tls_v1; + tls_data->valid = 0; + + tls_data->layout_minor_version = 1; + tls_data->trace_present = 1; // We are in a span so no errors + tls_data->trace_flags = + span.trace_segment().sampling_decision().has_value() && + (span.trace_segment().sampling_decision().value().priority > 0) + ? 1 + : 0; + auto trace_id = span.trace_id(); + tls_data->trace_id_low = trace_id.low; + tls_data->trace_id_high = trace_id.high; + tls_data->span_id = span.id(); + tls_data->transaction_id = span.trace_segment().local_root_id(); + + tls_data->valid = 1; +} +#endif + std::string Tracer::config() const { // clang-format off auto config = nlohmann::json::object({ @@ -192,6 +238,11 @@ Span Tracer::create_span(const SpanConfig& config) { Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, clock_}; + +#ifdef __linux__ + if (correlate_full_host_profiles_) correlate(span); +#endif + return span; } @@ -403,6 +454,11 @@ Expected Tracer::extract_span(const DictReader& reader, Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, clock_}; + +#ifdef __linux__ + if (correlate_full_host_profiles_) correlate(span); +#endif + return span; } diff --git a/src/datadog/tracer_config.cpp b/src/datadog/tracer_config.cpp index 629cd130..bf685ae9 100644 --- a/src/datadog/tracer_config.cpp +++ b/src/datadog/tracer_config.cpp @@ -127,6 +127,10 @@ Expected load_tracer_env_config(Logger &logger) { lookup(environment::DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED)) { env_cfg.generate_128bit_trace_ids = !falsy(*enabled_env); } + if (auto enabled_env = + lookup(environment::DD_TRACE_CORRELATE_FULL_HOST_PROFILES)) { + env_cfg.correlate_full_host_profiles = !falsy(*enabled_env); + } // Baggage if (auto baggage_items_env = @@ -361,6 +365,11 @@ Expected finalize_config(const TracerConfig &user_config, ConfigMetadata(ConfigName::GENEREATE_128BIT_TRACE_IDS, to_string(final_config.generate_128bit_trace_ids), origin); + // Correlate with Full Host Profiles + final_config.correlate_full_host_profiles = + value_or(env_config->correlate_full_host_profiles, + user_config.correlate_full_host_profiles, false); + // Integration name & version final_config.integration_name = value_or( env_config->integration_name, user_config.integration_name, "datadog"); diff --git a/test/test_tracer.cpp b/test/test_tracer.cpp index 48dda9d1..78dad142 100644 --- a/test/test_tracer.cpp +++ b/test/test_tracer.cpp @@ -1566,6 +1566,48 @@ TEST_CASE("128-bit trace IDs") { REQUIRE(*high == trace_id.high); } +#ifdef __linux__ +TEST_CASE("correlate full host profiles") { + TracerConfig config; + config.service = "testsvc"; + config.collector = std::make_shared(); + config.logger = std::make_shared(); + + SECTION("is off by default") { + auto finalized_config = finalize_config(config); + REQUIRE(finalized_config); + Tracer tracer{*finalized_config}; + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 == nullptr); + } + + SECTION("is available when enabled") { + config.correlate_full_host_profiles = true; + auto finalized_config = finalize_config(config); + REQUIRE(finalized_config); + Tracer tracer{*finalized_config}; + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 != nullptr); + { + auto span = tracer.create_span(); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 != nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_present == 1); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->valid == 1); + REQUIRE( + elastic_apm_profiling_correlation_tls_v1->trace_flags == + (span.trace_segment().sampling_decision().has_value() && + (span.trace_segment().sampling_decision().value().priority > + 0) + ? 1 + : 0)); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->span_id != 0); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->transaction_id != 0); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_id_high != 0); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_id_low != 0); + } + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_present == 0); + } +} +#endif + TEST_CASE( "_dd.p.tid invalid or inconsistent with trace ID results in error tag") { struct TestCase { diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index 197ab9df..4a9f10cd 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -1316,6 +1316,87 @@ TRACER_CONFIG_TEST("configure 128-bit trace IDs") { } } +#ifdef __linux__ +TRACER_CONFIG_TEST("TracerConfig::correlate_full_host_profiles") { + TracerConfig config; + config.service = "testsvc"; + config.logger = std::make_shared(); + + SECTION("default is false") { + { + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + auto span = tracer.create_span(); + } + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 == nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 == nullptr); + } + + SECTION("true enables correlation") { + { + config.correlate_full_host_profiles = true; + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + auto span = tracer.create_span(); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 != nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_present == 1); + } + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 != nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 != nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_present == 0); + // reset for next tests + elastic_apm_profiling_correlation_process_storage_v1 = nullptr; + elastic_apm_profiling_correlation_tls_v1 = nullptr; + } + + SECTION("overridden by DD_TRACE_CORRELATE_FULL_HOST_PROFILES") { + struct TestCase { + std::string name; + std::string dd_trace_correlate_full_host_profiles; + bool original_value; + bool correlate; + }; + + auto test_case = GENERATE(values({ + {"falsy override ('false')", "false", true, false}, + {"falsy override ('0')", "0", true, false}, + {"falsy consistent ('false')", "false", false, false}, + {"falsy consistent ('0')", "0", false, false}, + {"truthy override ('true')", "true", false, true}, + {"truthy override ('1')", "1", false, true}, + {"truthy consistent ('true')", "true", true, true}, + {"truthy consistent ('1')", "1", true, true}, + })); + + CAPTURE(test_case.name); + const EnvGuard guard{"DD_TRACE_CORRELATE_FULL_HOST_PROFILES", + test_case.dd_trace_correlate_full_host_profiles}; + config.report_traces = test_case.original_value; + { + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + auto span = tracer.create_span(); + if (test_case.correlate) { + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 != + nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 != nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1->trace_present == 1); + // reset for next tests + elastic_apm_profiling_correlation_process_storage_v1 = nullptr; + elastic_apm_profiling_correlation_tls_v1 = nullptr; + } else { + REQUIRE(elastic_apm_profiling_correlation_process_storage_v1 == + nullptr); + REQUIRE(elastic_apm_profiling_correlation_tls_v1 == nullptr); + } + } + } +} +#endif + TRACER_CONFIG_TEST("baggage") { TracerConfig config;