Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add propagate_unsampled field to trace config #38223

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ message HttpConnectionManager {
UNESCAPE_AND_FORWARD = 4;
}

// [#next-free-field: 11]
// [#next-free-field: 12]
message Tracing {
option (udpa.annotations.versioning).previous_message_type =
"envoy.config.filter.network.http_connection_manager.v2.HttpConnectionManager.Tracing";
Expand Down Expand Up @@ -171,6 +171,10 @@ message HttpConnectionManager {
// Default: 100%
type.v3.Percent overall_sampling = 5;

// Whether tracing metadata should be propagated upstream when the request is not sampled.
// Default: true.
google.protobuf.BoolValue propagate_unsampled = 11;

// Whether to annotate spans with additional data. If true, spans will include logs for stream
// events.
bool verbose = 6;
Expand Down
5 changes: 5 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ new_features:
change: |
Added :ref:`max_metadata_size <envoy_v3_api_field_config.core.v3.Http2ProtocolOptions.max_metadata_size>` to make
HTTP/2 metadata limits configurable.
- area: http
change: |
Added :ref:`propagate_unsampled
<envoy_v3_api_field_extensions.filters.network.http_connection_manager.v3.HttpConnectionManager.Tracing.propagate_unsampled>`
to support not modifying the existing trace context when requests aren't sampled.
- area: tcp_proxy
change: |
Added support for :ref:`backoff_options <envoy_v3_api_field_extensions.filters.network.tcp_proxy.v3.TcpProxy.backoff_options>`
Expand Down
5 changes: 5 additions & 0 deletions envoy/tracing/trace_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ class ConnectionManagerTracingConfig : public TracingConfig {
*/
virtual bool spawnUpstreamSpan() const PURE;

/**
* @return whether to propagate span information for unsampled requests.
*/
virtual bool propagateUnsampled() const PURE;

/**
* @return true if spans should be annotated with more detailed information.
*/
Expand Down
4 changes: 4 additions & 0 deletions source/common/http/conn_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1452,6 +1452,10 @@ void ConnectionManagerImpl::ActiveStream::traceRequest() {
ConnectionManagerImpl::chargeTracingStats(tracing_decision.reason,
connection_manager_.config_->tracingStats());

if (!connection_manager_tracing_config_->propagateUnsampled()) {
return;
}
Comment on lines +1455 to +1457
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I don't really understand the intention of this change. Maybe the option is not named well. Can you explain in longer form what problem this is trying to solve?

/wait

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem we are trying to solve is configuring Envoy to not append trace headers with its sampling decision when it has decided not trace a request.

I'll use the following example as a motivation of why we want this behavior:
Envoy --> Service A --> Service B

When Envoy chooses not to trace a request in the context of OpenTelemetry, it forwards the request to A with the traceparent flags set to -00 signaling it did not trace the request. This triggers A's tracing library to always not sample the request, regardless of its configuration.

We have some use cases where we'd like A to sample at a higher rate than our load balancers/service mesh sidecars (e.g. financial services sample at 100%, load balancer samples at 1%). The approach I took in this PR was if Envoy decided to not trace a request, set the span as a null span which should:

  1. Not override the traceparent header when using the OpenTelemetry tracer
  2. Propagate existing traceparent headers (not sure about this one)

I agree the name is pretty poor; if you have any recommendations I'm all ears.


Tracing::HttpTraceContext trace_context(*request_headers_);
active_span_ = connection_manager_.tracer().startSpan(
*this, trace_context, filter_manager_.streamInfo(), tracing_decision);
Expand Down
12 changes: 9 additions & 3 deletions source/common/tracing/tracer_config_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class ConnectionManagerTracingConfigImpl : public ConnectionManagerTracingConfig
tracing_config, overall_sampling, 10000, 10000)};
overall_sampling_.set_numerator(overall_sampling_numerator);
overall_sampling_.set_denominator(envoy::type::v3::FractionalPercent::TEN_THOUSAND);
propagate_unsampled_ = tracing_config.has_propagate_unsampled()
? tracing_config.propagate_unsampled().value()
: true;

verbose_ = tracing_config.verbose();
max_path_tag_length_ = PROTOBUF_GET_WRAPPED_OR_DEFAULT(tracing_config, max_path_tag_length,
Expand All @@ -82,11 +85,12 @@ class ConnectionManagerTracingConfigImpl : public ConnectionManagerTracingConfig
envoy::type::v3::FractionalPercent client_sampling,
envoy::type::v3::FractionalPercent random_sampling,
envoy::type::v3::FractionalPercent overall_sampling,
bool verbose, uint32_t max_path_tag_length)
bool propagate_unsampled, bool verbose,
uint32_t max_path_tag_length)
: operation_name_(operation_name), custom_tags_(custom_tags),
client_sampling_(client_sampling), random_sampling_(random_sampling),
overall_sampling_(overall_sampling), verbose_(verbose),
max_path_tag_length_(max_path_tag_length) {}
overall_sampling_(overall_sampling), propagate_unsampled_(propagate_unsampled),
verbose_(verbose), max_path_tag_length_(max_path_tag_length) {}

ConnectionManagerTracingConfigImpl() = default;

Expand All @@ -99,6 +103,7 @@ class ConnectionManagerTracingConfigImpl : public ConnectionManagerTracingConfig
const envoy::type::v3::FractionalPercent& getOverallSampling() const override {
return overall_sampling_;
}
bool propagateUnsampled() const override { return propagate_unsampled_; }
const Tracing::CustomTagMap& getCustomTags() const override { return custom_tags_; }

Tracing::OperationName operationName() const override { return operation_name_; }
Expand All @@ -113,6 +118,7 @@ class ConnectionManagerTracingConfigImpl : public ConnectionManagerTracingConfig
envoy::type::v3::FractionalPercent client_sampling_;
envoy::type::v3::FractionalPercent random_sampling_;
envoy::type::v3::FractionalPercent overall_sampling_;
bool propagate_unsampled_{};
bool verbose_{};
uint32_t max_path_tag_length_{};
bool spawn_upstream_span_{};
Expand Down
66 changes: 65 additions & 1 deletion test/common/http/conn_manager_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ TEST_F(HttpConnectionManagerImplTest, StartAndFinishSpanNormalFlow) {
}
tracing_config_ = std::make_unique<TracingConnectionManagerConfig>(
TracingConnectionManagerConfig{Tracing::OperationName::Ingress, conn_tracing_tags, percent1,
percent2, percent1, false, 256});
percent2, percent1, true, false, 256});
NiceMock<Router::MockRouteTracing> route_tracing;
ON_CALL(route_tracing, getClientSampling()).WillByDefault(ReturnRef(percent1));
ON_CALL(route_tracing, getRandomSampling()).WillByDefault(ReturnRef(percent2));
Expand Down Expand Up @@ -1865,6 +1865,7 @@ TEST_F(HttpConnectionManagerImplTest, StartAndFinishSpanNormalFlowEgressDecorato
percent1,
percent2,
percent1,
true,
false,
256});

Expand Down Expand Up @@ -1949,6 +1950,7 @@ TEST_F(HttpConnectionManagerImplTest, StartAndFinishSpanNormalFlowEgressDecorato
percent1,
percent2,
percent1,
true,
false,
256});

Expand Down Expand Up @@ -2035,6 +2037,7 @@ TEST_F(HttpConnectionManagerImplTest, StartAndFinishSpanNormalFlowEgressDecorato
percent1,
percent2,
percent1,
true,
false,
256});

Expand Down Expand Up @@ -2113,6 +2116,7 @@ TEST_F(HttpConnectionManagerImplTest,
percent1,
percent2,
percent1,
true,
false,
256});

Expand Down Expand Up @@ -2209,6 +2213,66 @@ TEST_F(HttpConnectionManagerImplTest, NoHCMTracingConfigAndActiveSpanWouldBeNull
conn_manager_->onData(fake_input, false);
}

TEST_F(HttpConnectionManagerImplTest, SkipUnsampledSpan) {
setup();
envoy::type::v3::FractionalPercent zero_percent;
tracing_config_ = std::make_unique<TracingConnectionManagerConfig>(
TracingConnectionManagerConfig{Tracing::OperationName::Egress,
{{":method", requestHeaderCustomTag(":method")}},
zero_percent,
zero_percent,
zero_percent,
false,
false,
256});

std::shared_ptr<MockStreamDecoderFilter> filter(new NiceMock<MockStreamDecoderFilter>());

EXPECT_CALL(filter_factory_, createFilterChain(_))
.WillRepeatedly(Invoke([&](FilterChainManager& manager) -> bool {
auto factory = createDecoderFilterFactoryCb(filter);
manager.applyFilterFactoryCb({}, factory);
return true;
}));

// Treat request as internal, otherwise x-request-id header will be overwritten.
use_remote_address_ = false;
EXPECT_CALL(random_, uuid()).Times(0);

EXPECT_CALL(*codec_, dispatch(_))
.WillRepeatedly(Invoke([&](Buffer::Instance& data) -> Http::Status {
decoder_ = &conn_manager_->newStream(response_encoder_);

RequestHeaderMapPtr headers{
new TestRequestHeaderMapImpl{{":method", "GET"},
{":authority", "host"},
{":path", "/"},
{"x-request-id", "125a4afb-6f55-a4ba-ad80-413f09f48a28"}}};
decoder_->decodeHeaders(std::move(headers), true);

filter->callbacks_->streamInfo().setResponseCodeDetails("");
ResponseHeaderMapPtr response_headers{new TestResponseHeaderMapImpl{
{":status", "200"}, {"x-envoy-decorator-operation", "testOp"}}};
filter->callbacks_->encodeHeaders(std::move(response_headers), true, "details");

// Active span always be null span when sampling didn't occur.
EXPECT_EQ(&Tracing::NullSpan::instance(), &filter->callbacks_->activeSpan());
// Child span should also be null span.
Tracing::SpanPtr child_span = filter->callbacks_->activeSpan().spawnChild(
Tracing::EgressConfig::get(), "null_child", test_time_.systemTime());
Tracing::SpanPtr null_span = std::make_unique<Tracing::NullSpan>();
auto& child_span_ref = *child_span;
auto& null_span_ref = *null_span;
EXPECT_EQ(typeid(child_span_ref).name(), typeid(null_span_ref).name());

data.drain(4);
return Http::okStatus();
}));

Buffer::OwnedImpl fake_input("1234");
conn_manager_->onData(fake_input, false);
}

TEST_F(HttpConnectionManagerImplTest, TestAccessLog) {
static constexpr char remote_address[] = "0.0.0.0";
static constexpr char xff_address[] = "1.2.3.4";
Expand Down
1 change: 1 addition & 0 deletions test/common/http/conn_manager_impl_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ void HttpConnectionManagerImplMixin::setup(const SetupOpts& opts) {
percent1,
percent2,
percent1,
true,
false,
256});
}
Expand Down
2 changes: 1 addition & 1 deletion test/common/http/conn_manager_utility_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class ConnectionManagerUtilityTest : public testing::Test {
percent2.set_numerator(10000);
percent2.set_denominator(envoy::type::v3::FractionalPercent::TEN_THOUSAND);
tracing_config_ = {
Tracing::OperationName::Ingress, {}, percent1, percent2, percent1, false, 256};
Tracing::OperationName::Ingress, {}, percent1, percent2, percent1, true, false, 256};
ON_CALL(config_, tracingConfig()).WillByDefault(Return(&tracing_config_));
ON_CALL(config_, localReply()).WillByDefault(ReturnRef(*local_reply_));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ TEST_F(HttpConnectionManagerConfigTest, SamplingDefault) {
EXPECT_EQ(10000, config.tracingConfig()->overall_sampling_.numerator());
EXPECT_EQ(envoy::type::v3::FractionalPercent::TEN_THOUSAND,
config.tracingConfig()->overall_sampling_.denominator());
EXPECT_TRUE(config.tracingConfig()->propagate_unsampled_);
}

TEST_F(HttpConnectionManagerConfigTest, SamplingConfigured) {
Expand All @@ -566,6 +567,7 @@ TEST_F(HttpConnectionManagerConfigTest, SamplingConfigured) {
value: 2
overall_sampling:
value: 3
propagate_unsampled: false
http_filters:
- name: envoy.filters.http.router
typed_config:
Expand All @@ -587,6 +589,7 @@ TEST_F(HttpConnectionManagerConfigTest, SamplingConfigured) {
EXPECT_EQ(300, config.tracingConfig()->overall_sampling_.numerator());
EXPECT_EQ(envoy::type::v3::FractionalPercent::TEN_THOUSAND,
config.tracingConfig()->overall_sampling_.denominator());
EXPECT_FALSE(config.tracingConfig()->propagate_unsampled_);
}

TEST_F(HttpConnectionManagerConfigTest, FractionalSamplingConfigured) {
Expand Down Expand Up @@ -624,6 +627,7 @@ TEST_F(HttpConnectionManagerConfigTest, FractionalSamplingConfigured) {
EXPECT_EQ(30, config.tracingConfig()->overall_sampling_.numerator());
EXPECT_EQ(envoy::type::v3::FractionalPercent::TEN_THOUSAND,
config.tracingConfig()->overall_sampling_.denominator());
EXPECT_TRUE(config.tracingConfig()->propagate_unsampled_);
}

TEST_F(HttpConnectionManagerConfigTest, OverallSampling) {
Expand Down Expand Up @@ -682,6 +686,40 @@ TEST_F(HttpConnectionManagerConfigTest, OverallSampling) {
EXPECT_GE(1200, sampled_count);
}

TEST_F(HttpConnectionManagerConfigTest, PropagateUnsampledExplicitTrue) {
const std::string yaml_string = R"EOF(
stat_prefix: ingress_http
internal_address_config:
unix_sockets: true
route_config:
name: local_route
tracing:
propagate_unsampled: true
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
)EOF";

HttpConnectionManagerConfig config(parseHttpConnectionManagerFromYaml(yaml_string), context_,
date_provider_, route_config_provider_manager_,
&scoped_routes_config_provider_manager_, tracer_manager_,
filter_config_provider_manager_, creation_status_);
ASSERT_TRUE(creation_status_.ok());

EXPECT_EQ(100, config.tracingConfig()->client_sampling_.numerator());
EXPECT_EQ(Tracing::DefaultMaxPathTagLength, config.tracingConfig()->max_path_tag_length_);
EXPECT_EQ(envoy::type::v3::FractionalPercent::HUNDRED,
config.tracingConfig()->client_sampling_.denominator());
EXPECT_EQ(10000, config.tracingConfig()->random_sampling_.numerator());
EXPECT_EQ(envoy::type::v3::FractionalPercent::TEN_THOUSAND,
config.tracingConfig()->random_sampling_.denominator());
EXPECT_EQ(10000, config.tracingConfig()->overall_sampling_.numerator());
EXPECT_EQ(envoy::type::v3::FractionalPercent::TEN_THOUSAND,
config.tracingConfig()->overall_sampling_.denominator());
EXPECT_TRUE(config.tracingConfig()->propagate_unsampled_);
}

TEST_F(HttpConnectionManagerConfigTest, UnixSocketInternalAddress) {
const std::string yaml_string = R"EOF(
stat_prefix: ingress_http
Expand Down
Loading