Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype for W3C Trace Context Level 2 support in TraceIDRatioBased sampler #5645

Closed
wants to merge 13 commits into from
6 changes: 3 additions & 3 deletions propagation/trace_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ func (tc TraceContext) Inject(ctx context.Context, carrier TextMapCarrier) {
carrier.Set(tracestateHeader, ts)
}

// Clear all flags other than the trace-context supported sampling bit.
flags := sc.TraceFlags() & trace.FlagsSampled
// Clear all flags other than the trace-context supported bits.
flags := sc.TraceFlags() & (trace.FlagsSampled | trace.FlagsRandom)

var sb strings.Builder
sb.Grow(2 + 32 + 16 + 2 + 3)
Expand Down Expand Up @@ -111,7 +111,7 @@ func (tc TraceContext) extract(carrier TextMapCarrier) trace.SpanContext {
}

// Clear all flags other than the trace-context supported sampling bit.
scc.TraceFlags = trace.TraceFlags(opts[0]) & trace.FlagsSampled
scc.TraceFlags = trace.TraceFlags(opts[0]) & (trace.FlagsSampled | trace.FlagsRandom)

// Ignore the error returned here. Failure to parse tracestate MUST NOT
// affect the parsing of traceparent according to the W3C tracecontext
Expand Down
8 changes: 4 additions & 4 deletions propagation/trace_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ func TestExtractValidTraceContext(t *testing.T) {
}),
},
{
name: "future version sampled",
name: "future version sampled and random",
header: http.Header{
traceparent: []string{"02-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"},
traceparent: []string{"02-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-03"},
},
sc: trace.NewSpanContext(trace.SpanContextConfig{
TraceID: traceID,
SpanID: spanID,
TraceFlags: trace.FlagsSampled,
TraceFlags: trace.FlagsSampled | trace.FlagsRandom,
Remote: true,
}),
},
Expand Down Expand Up @@ -290,7 +290,7 @@ func TestInjectValidTraceContext(t *testing.T) {
{
name: "unsupported trace flag bits dropped",
header: http.Header{
traceparent: []string{"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"},
traceparent: []string{"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-03"},
},
sc: trace.NewSpanContext(trace.SpanContextConfig{
TraceID: traceID,
Expand Down
14 changes: 14 additions & 0 deletions sdk/trace/id_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,22 @@ type IDGenerator interface {
// must never be done outside of a new major release.
}

// IDGeneratorRandom allows custom generators for TraceID and SpanID that comply
// with W3C Trace Context Level 2 randomness requirements.
type W3CTraceContextIDGenerator interface {
// W3CTraceContextLevel2Random, when implemented by a
// generator, indicates that this generator meets the
// requirements
W3CTraceContextLevel2Random()
}

type randomIDGenerator struct {
sync.Mutex
randSource *rand.Rand
}

var _ IDGenerator = &randomIDGenerator{}
var _ W3CTraceContextIDGenerator = &randomIDGenerator{}

// NewSpanID returns a non-zero span ID from a randomly-chosen sequence.
func (gen *randomIDGenerator) NewSpanID(ctx context.Context, traceID trace.TraceID) trace.SpanID {
Expand Down Expand Up @@ -72,6 +82,10 @@ func (gen *randomIDGenerator) NewIDs(ctx context.Context) (trace.TraceID, trace.
return tid, sid
}

// W3CTraceContextLevel2Random declares meeting the W3C trace context
// level 2 randomness requirement.
func (gen *randomIDGenerator) W3CTraceContextLevel2Random() {}

func defaultIDGenerator() IDGenerator {
gen := &randomIDGenerator{}
var rngSeed int64
Expand Down
234 changes: 220 additions & 14 deletions sdk/trace/sampling.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ import (
"context"
"encoding/binary"
"fmt"
"math"
"strconv"
"strings"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
Expand Down Expand Up @@ -64,56 +68,258 @@ type SamplingResult struct {
}

type traceIDRatioSampler struct {
traceIDUpperBound uint64
description string
// threshold is a rejection threshold.
// Select when (T <= R)
// Drop when (T > R)
// Range is [0, 1<<56).
threshold uint64

// otts is the encoded OTel trace state field, containing "th:<tvalue>"
otts string

description string
}

// tracestateHasRandomness determines whether there is a "rv" sub-key
// in `otts` which is the OTel tracestate value (i.e., the top-level "ot" value).
func tracestateHasRandomness(otts string) (randomness uint64, hasRandom bool) {
var low int
if has := strings.HasPrefix(otts, "rv:"); has {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if has := strings.HasPrefix(otts, "rv:"); has {
if strings.HasPrefix(otts, "rv:") {

low = 3
} else if pos := strings.Index(otts, ";rv:"); pos > 0 {
low = pos + 4
} else {
return 0, false
}
if len(otts) < low+14 {
otel.Handle(fmt.Errorf("could not parse tracestate randomness: %q: %w", otts, strconv.ErrSyntax))
} else if len(otts) > low+14 && otts[low+14] != ';' {
otel.Handle(fmt.Errorf("could not parse tracestate randomness: %q: %w", otts, strconv.ErrSyntax))
} else {
randomIn := otts[low : low+14]
if rv, err := strconv.ParseUint(randomIn, 16, 64); err == nil {
randomness = rv
hasRandom = true
} else {
otel.Handle(fmt.Errorf("could not parse tracestate randomness: %q: %w", randomIn, err))
}
}
return
}

func (ts traceIDRatioSampler) ShouldSample(p SamplingParameters) SamplingResult {
psc := trace.SpanContextFromContext(p.ParentContext)
x := binary.BigEndian.Uint64(p.TraceID[8:16]) >> 1
if x < ts.traceIDUpperBound {
state := psc.TraceState()

existOtts := state.Get("ot")

var randomness uint64
var hasRandom bool
if existOtts != "" {
// When the OTel trace state field exists, we will
// inspect for a "rv", otherwise assume that the
// TraceID is random.
randomness, hasRandom = tracestateHasRandomness(existOtts)
}
if !hasRandom {
// Interpret the least-significant 8-bytes as an
// unsigned number, then zero the top 8 bits using
// randomnessMask, yielding the least-significant 56
// bits of randomness, as specified in W3C Trace
// Context Level 2.
randomness = binary.BigEndian.Uint64(p.TraceID[8:16]) & randomnessMask
}
if ts.threshold > randomness {
return SamplingResult{
Decision: RecordAndSample,
Tracestate: psc.TraceState(),
Decision: Drop,
Tracestate: state,
}
}

if mod, err := state.Insert("ot", combineTracestate(existOtts, ts.otts)); err == nil {
state = mod
} else {
otel.Handle(fmt.Errorf("could not update tracestate: %q", err))
}
return SamplingResult{
Decision: Drop,
Tracestate: psc.TraceState(),
Decision: RecordAndSample,
Tracestate: state,
}
}

// combineTracestate combines an existing OTel tracestate fragment,
// which is the value of a top-level "ot" tracestate vendor tag.
func combineTracestate(incoming, updated string) string {
// `incoming` is formatted according to the OTel tracestate
// spec, with colon separating two-byte key and value, with
// semi-colon separating key-value pairs.
//
// `updated` should be a single two-byte key:value to modify
// or insert therefore colonOffset is 2 bytes, valueOffset is
// 3 bytes into `incoming`.
const colonOffset = 2
const valueOffset = colonOffset + 1

if incoming == "" {
return updated
}
var out strings.Builder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be clearer later, but can the final size be determined at this point already?


// The update is expected to be a single key-value of the form
// `XX:value` for with two-character key.
upkey := updated[:colonOffset]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this is guaranteed to have a th: prefix in the current code, but do you think it would be worth having a sanity check to future-proof it?


// In this case, there is an existing field under "ot" and we
// need to combine. We will pass the parts of "incoming"
// through except the field we are updating, which we will
// modify if it is found.
foundUp := false

for count := 0; len(incoming) != 0; count++ {
key, rest, hasCol := strings.Cut(incoming, ":")
if !hasCol {
// return the updated value, ignore invalid inputs
return updated
}
value, next, _ := strings.Cut(rest, ";")

if key == upkey {
value = updated[valueOffset:]
foundUp = true
}
if count != 0 {
out.WriteString(";")
}
out.WriteString(key)
out.WriteString(":")
out.WriteString(value)

incoming = next
}
if !foundUp {
out.WriteString(";")
out.WriteString(updated)
}
return out.String()
}

func (ts traceIDRatioSampler) Description() string {
return ts.description
}

const (
// DefaultSamplingPrecision is the number of hexadecimal
// digits of precision used to expressed the samplling probability.
DefaultSamplingPrecision = 4

// MinSupportedProbability is the smallest probability that
// can be encoded by this implementation, and it defines the
// smallest interval between probabilities across the range.
// The largest supported probability is (1-MinSupportedProbability).
//
// This value corresponds with the size of a float64
// significand, because it simplifies this implementation to
// restrict the probability to use 52 bits (vs 56 bits).
minSupportedProbability float64 = 1 / float64(maxAdjustedCount)

// maxSupportedProbability is the number closest to 1.0 (i.e.,
// near 99.999999%) that is not equal to 1.0 in terms of the
// float64 representation, having 52 bits of significand.
// Other ways to express this number:
//
// 0x1.ffffffffffffe0p-01
// 0x0.fffffffffffff0p+00
// math.Nextafter(1.0, 0.0)
maxSupportedProbability float64 = 1 - 0x1p-52
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name maxSupportedProbability could be misleading here, as consistent sampling naturally also supports sampling with a probability of 1.


// maxAdjustedCount is the inverse of the smallest
// representable sampling probability, it is the number of
// distinct 56 bit values.
maxAdjustedCount uint64 = 1 << 56

// randomnessMask is a mask that selects the least-significant
// 56 bits of a uint64.
randomnessMask uint64 = maxAdjustedCount - 1
)

// TraceIDRatioBased samples a given fraction of traces. Fractions >= 1 will
// always sample. Fractions < 0 are treated as zero. To respect the
// parent trace's `SampledFlag`, the `TraceIDRatioBased` sampler should be used
// as a delegate of a `Parent` sampler.
//
//nolint:revive // revive complains about stutter of `trace.TraceIDRatioBased`
func TraceIDRatioBased(fraction float64) Sampler {
if fraction >= 1 {
const (
maxp = 14 // maximum precision is 56 bits
defp = DefaultSamplingPrecision // default precision
hbits = 4 // bits per hex digit
)

if fraction > 1-0x1p-52 {
return AlwaysSample()
}

if fraction <= 0 {
fraction = 0
if fraction < minSupportedProbability {
return NeverSample()
}

// Calculate the amount of precision needed to encode the
// threshold with reasonable precision.
//
// 13 hex digits is the maximum reasonable precision, since
// that equals 52 bits, the number of bits in the float64
// significand.
//
// Frexp() normalizes both the fraction and one-minus the
// fraction, because more digits of precision are needed in
// both cases -- in these cases the threshold has all leading
// '0' or 'f' characters.
//
// We know that `exp <= 0`. If `exp <= -4`, there will be a
// leading hex `0` or `f`. For every multiple of -4, another
// leading `0` or `f` appears, so this raises precision
// accordingly.
_, expF := math.Frexp(fraction)
_, expR := math.Frexp(1 - fraction)
precision := min(maxp, max(defp+expF/-hbits, defp+expR/-hbits))

// Compute the threshold
scaled := uint64(math.Round(fraction * float64(maxAdjustedCount)))
threshold := maxAdjustedCount - scaled

// Round to the specified precision, if less than the maximum.
if shift := hbits * (maxp - precision); shift != 0 {
half := uint64(1) << (shift - 1)
threshold += half
threshold >>= shift
threshold <<= shift
}

// Add maxAdjustedCount so that leading-zeros are formatted by
// the strconv library after an artificial leading "1". Then,
// strip the leadingt "1", then remove trailing zeros.
tvalue := strings.TrimRight(strconv.FormatUint(maxAdjustedCount+threshold, 16)[1:], "0")

return &traceIDRatioSampler{
traceIDUpperBound: uint64(fraction * (1 << 63)),
description: fmt.Sprintf("TraceIDRatioBased{%g}", fraction),
threshold: threshold,
otts: fmt.Sprint("th:", tvalue),
description: fmt.Sprintf("TraceIDRatioBased{%g}", fraction),
}
}

type alwaysOnSampler struct{}

func (as alwaysOnSampler) ShouldSample(p SamplingParameters) SamplingResult {
ts := trace.SpanContextFromContext(p.ParentContext).TraceState()
// 100% sampling equals zero rejection threshold.
if mod, err := ts.Insert("ot", combineTracestate(ts.Get("ot"), "th:0")); err == nil {
ts = mod
} else {
otel.Handle(fmt.Errorf("could not update tracestate: %w", err))
}
return SamplingResult{
Decision: RecordAndSample,
Tracestate: trace.SpanContextFromContext(p.ParentContext).TraceState(),
Tracestate: ts,
}
}

Expand Down
Loading
Loading