diff --git a/CHANGELOG.md b/CHANGELOG.md index e6dbb2731ed..2f181cfcccc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - `SimpleProcessor` in `go.opentelemetry.io/otel/sdk/log` synchronizes `OnEmit` calls. (#5666) - The `SimpleProcessor` type in `go.opentelemetry.io/otel/sdk/log` is no longer comparable. (#5693) - The `BatchProcessor` type in `go.opentelemetry.io/otel/sdk/log` is no longer comparable. (#5693) +- `NewMemberRaw`, `NewKeyProperty` and `NewKeyValuePropertyRaw` in `go.opentelemetry.io/otel/baggage` allow UTF-8 string in key. (#5132) ### Fixed diff --git a/baggage/baggage.go b/baggage/baggage.go index 69d7841b4c9..b3569e95e5c 100644 --- a/baggage/baggage.go +++ b/baggage/baggage.go @@ -44,9 +44,15 @@ type Property struct { // NewKeyProperty returns a new Property for key. // +// The passed key must be valid, non-empty UTF-8 string. // If key is invalid, an error will be returned. +// However, the specific Propagators that are used to transmit baggage entries across +// component boundaries may impose their own restrictions on Property key. +// For example, the W3C Baggage specification restricts the Property keys to strings that +// satisfy the token definition from RFC7230, Section 3.2.6. +// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key. func NewKeyProperty(key string) (Property, error) { - if !validateKey(key) { + if !validateBaggageName(key) { return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key) } @@ -62,6 +68,10 @@ func NewKeyProperty(key string) (Property, error) { // Notice: Consider using [NewKeyValuePropertyRaw] instead // that does not require percent-encoding of the value. func NewKeyValueProperty(key, value string) (Property, error) { + if !validateKey(key) { + return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key) + } + if !validateValue(value) { return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidValue, value) } @@ -74,11 +84,20 @@ func NewKeyValueProperty(key, value string) (Property, error) { // NewKeyValuePropertyRaw returns a new Property for key with value. // -// The passed key must be compliant with W3C Baggage specification. +// The passed key must be valid, non-empty UTF-8 string. +// The passed value must be valid UTF-8 string. +// However, the specific Propagators that are used to transmit baggage entries across +// component boundaries may impose their own restrictions on Property key. +// For example, the W3C Baggage specification restricts the Property keys to strings that +// satisfy the token definition from RFC7230, Section 3.2.6. +// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key. func NewKeyValuePropertyRaw(key, value string) (Property, error) { - if !validateKey(key) { + if !validateBaggageName(key) { return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key) } + if !validateBaggageValue(value) { + return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidValue, value) + } p := Property{ key: key, @@ -115,12 +134,15 @@ func (p Property) validate() error { return fmt.Errorf("invalid property: %w", err) } - if !validateKey(p.key) { + if !validateBaggageName(p.key) { return errFunc(fmt.Errorf("%w: %q", errInvalidKey, p.key)) } if !p.hasValue && p.value != "" { return errFunc(errors.New("inconsistent value")) } + if p.hasValue && !validateBaggageValue(p.value) { + return errFunc(fmt.Errorf("%w: %q", errInvalidValue, p.value)) + } return nil } @@ -138,7 +160,15 @@ func (p Property) Value() (string, bool) { // String encodes Property into a header string compliant with the W3C Baggage // specification. +// It would return empty string if the key is invalid with the W3C Baggage +// specification. This could happen for a UTF-8 key, as it may contain +// invalid characters. func (p Property) String() string { + // W3C Baggage specification does not allow percent-encoded keys. + if !validateKey(p.key) { + return "" + } + if p.hasValue { return fmt.Sprintf("%s%s%v", p.key, keyValueDelimiter, valueEscape(p.value)) } @@ -203,9 +233,14 @@ func (p properties) validate() error { // String encodes properties into a header string compliant with the W3C Baggage // specification. func (p properties) String() string { - props := make([]string, len(p)) - for i, prop := range p { - props[i] = prop.String() + props := make([]string, 0, len(p)) + for _, prop := range p { + s := prop.String() + + // Ignored empty properties. + if s != "" { + props = append(props, s) + } } return strings.Join(props, propertyDelimiter) } @@ -230,6 +265,10 @@ type Member struct { // Notice: Consider using [NewMemberRaw] instead // that does not require percent-encoding of the value. func NewMember(key, value string, props ...Property) (Member, error) { + if !validateKey(key) { + return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key) + } + if !validateValue(value) { return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, value) } @@ -242,7 +281,13 @@ func NewMember(key, value string, props ...Property) (Member, error) { // NewMemberRaw returns a new Member from the passed arguments. // -// The passed key must be compliant with W3C Baggage specification. +// The passed key must be valid, non-empty UTF-8 string. +// The passed value must be valid UTF-8 string. +// However, the specific Propagators that are used to transmit baggage entries across +// component boundaries may impose their own restrictions on baggage key. +// For example, the W3C Baggage specification restricts the baggage keys to strings that +// satisfy the token definition from RFC7230, Section 3.2.6. +// For maximum compatibility, alpha-numeric value are strongly recommended to be used as baggage key. func NewMemberRaw(key, value string, props ...Property) (Member, error) { m := Member{ key: key, @@ -340,9 +385,12 @@ func (m Member) validate() error { return fmt.Errorf("%w: %q", errInvalidMember, m) } - if !validateKey(m.key) { + if !validateBaggageName(m.key) { return fmt.Errorf("%w: %q", errInvalidKey, m.key) } + if !validateBaggageValue(m.value) { + return fmt.Errorf("%w: %q", errInvalidValue, m.value) + } return m.properties.validate() } @@ -357,10 +405,15 @@ func (m Member) Properties() []Property { return m.properties.Copy() } // String encodes Member into a header string compliant with the W3C Baggage // specification. +// It would return empty string if the key is invalid with the W3C Baggage +// specification. This could happen for a UTF-8 key, as it may contain +// invalid characters. func (m Member) String() string { - // A key is just an ASCII string. A value is restricted to be - // US-ASCII characters excluding CTLs, whitespace, - // DQUOTE, comma, semicolon, and backslash. + // W3C Baggage specification does not allow percent-encoded keys. + if !validateKey(m.key) { + return "" + } + s := m.key + keyValueDelimiter + valueEscape(m.value) if len(m.properties) > 0 { s += propertyDelimiter + m.properties.String() @@ -554,14 +607,22 @@ func (b Baggage) Len() int { // String encodes Baggage into a header string compliant with the W3C Baggage // specification. +// It would ignore members where the member key is invalid with the W3C Baggage +// specification. This could happen for a UTF-8 key, as it may contain +// invalid characters. func (b Baggage) String() string { members := make([]string, 0, len(b.list)) for k, v := range b.list { - members = append(members, Member{ + s := Member{ key: k, value: v.Value, properties: fromInternalProperties(v.Properties), - }.String()) + }.String() + + // Ignored empty members. + if s != "" { + members = append(members, s) + } } return strings.Join(members, listDelimiter) } @@ -748,6 +809,24 @@ var safeKeyCharset = [utf8.RuneSelf]bool{ '~': true, } +// validateBaggageName checks if the string is a valid OpenTelemetry Baggage name. +// Baggage name is a valid, non-empty UTF-8 string. +func validateBaggageName(s string) bool { + if len(s) == 0 { + return false + } + + return utf8.ValidString(s) +} + +// validateBaggageValue checks if the string is a valid OpenTelemetry Baggage value. +// Baggage value is a valid UTF-8 strings. +// Empty string is also a valid UTF-8 string. +func validateBaggageValue(s string) bool { + return utf8.ValidString(s) +} + +// validateKey checks if the string is a valid W3C Baggage key. func validateKey(s string) bool { if len(s) == 0 { return false @@ -766,6 +845,7 @@ func validateKeyChar(c int32) bool { return c >= 0 && c < int32(utf8.RuneSelf) && safeKeyCharset[c] } +// validateValue checks if the string is a valid W3C Baggage value. func validateValue(s string) bool { for _, c := range s { if !validateValueChar(c) { diff --git a/baggage/baggage_test.go b/baggage/baggage_test.go index 146673846fc..7d164727128 100644 --- a/baggage/baggage_test.go +++ b/baggage/baggage_test.go @@ -124,12 +124,22 @@ func TestParsePropertyError(t *testing.T) { func TestNewKeyProperty(t *testing.T) { p, err := NewKeyProperty(" ") - assert.ErrorIs(t, err, errInvalidKey) - assert.Equal(t, Property{}, p) + assert.NoError(t, err) + assert.Equal(t, Property{key: " "}, p) p, err = NewKeyProperty("key") assert.NoError(t, err) assert.Equal(t, Property{key: "key"}, p) + + // UTF-8 key + p, err = NewKeyProperty("B% 💼") + assert.NoError(t, err) + assert.Equal(t, Property{key: "B% 💼"}, p) + + // Invalid UTF-8 key + p, err = NewKeyProperty(string([]byte{255})) + assert.ErrorIs(t, err, errInvalidKey) + assert.Equal(t, Property{}, p) } func TestNewKeyValueProperty(t *testing.T) { @@ -141,19 +151,46 @@ func TestNewKeyValueProperty(t *testing.T) { assert.ErrorIs(t, err, errInvalidValue) assert.Equal(t, Property{}, p) + // it won't use percent decoding for key + p, err = NewKeyValueProperty("%zzzzz", "value") + assert.NoError(t, err) + assert.Equal(t, Property{key: "%zzzzz", value: "value", hasValue: true}, p) + + // wrong value with wrong decoding + p, err = NewKeyValueProperty("key", "%zzzzz") + assert.ErrorIs(t, err, errInvalidValue) + assert.Equal(t, Property{}, p) + p, err = NewKeyValueProperty("key", "value") assert.NoError(t, err) assert.Equal(t, Property{key: "key", value: "value", hasValue: true}, p) + + // Percent-encoded value + p, err = NewKeyValueProperty("key", "%C4%85%C5%9B%C4%87") + assert.NoError(t, err) + assert.Equal(t, Property{key: "key", value: "ąść", hasValue: true}, p) } func TestNewKeyValuePropertyRaw(t *testing.T) { - p, err := NewKeyValuePropertyRaw(" ", "") + // Empty key + p, err := NewKeyValuePropertyRaw("", " ") assert.ErrorIs(t, err, errInvalidKey) assert.Equal(t, Property{}, p) - p, err = NewKeyValuePropertyRaw("key", "Witaj Świecie!") + // Empty value + // Empty string is also a valid UTF-8 string. + p, err = NewKeyValuePropertyRaw(" ", "") + assert.NoError(t, err) + assert.Equal(t, Property{key: " ", hasValue: true}, p) + + // Space value + p, err = NewKeyValuePropertyRaw(" ", " ") assert.NoError(t, err) - assert.Equal(t, Property{key: "key", value: "Witaj Świecie!", hasValue: true}, p) + assert.Equal(t, Property{key: " ", value: " ", hasValue: true}, p) + + p, err = NewKeyValuePropertyRaw("B% 💼", "Witaj Świecie!") + assert.NoError(t, err) + assert.Equal(t, Property{key: "B% 💼", value: "Witaj Świecie!", hasValue: true}, p) } func TestPropertyValidate(t *testing.T) { @@ -168,6 +205,10 @@ func TestPropertyValidate(t *testing.T) { p.hasValue = true assert.NoError(t, p.validate()) + + // Invalid value + p.value = string([]byte{255}) + assert.ErrorIs(t, p.validate(), errInvalidValue) } func TestNewEmptyBaggage(t *testing.T) { @@ -410,6 +451,24 @@ func TestBaggageParse(t *testing.T) { "foo": {Value: "ąść"}, }, }, + { + name: "encoded UTF-8 string in key", + in: "a=b,%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87", + want: baggage.List{ + "a": {Value: "b"}, + "%C4%85%C5%9B%C4%87": {Value: "ąść"}, + }, + }, + { + name: "encoded UTF-8 string in property", + in: "a=b,%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87;%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87", + want: baggage.List{ + "a": {Value: "b"}, + "%C4%85%C5%9B%C4%87": {Value: "ąść", Properties: []baggage.Property{ + {Key: "%C4%85%C5%9B%C4%87", HasValue: true, Value: "ąść"}, + }}, + }, + }, { name: "invalid member: empty", in: "foo=,,bar=", @@ -455,6 +514,11 @@ func TestBaggageParse(t *testing.T) { in: "foo=1;key=\\", err: errInvalidProperty, }, + { + name: "invalid property: improper url encoded value", + in: "foo=1;key=val%", + err: errInvalidProperty, + }, { name: "invalid baggage string: too large", in: tooLarge, @@ -664,6 +728,29 @@ func TestBaggageString(t *testing.T) { }, }, }, + { + // W3C does not allow percent-encoded keys. + // The baggage that has percent-encoded keys should be ignored. + name: "utf-8 key and value", + out: "foo=B%25%20%F0%9F%92%BC-2;foo-1=B%25%20%F0%9F%92%BC-4;foo-2", + baggage: baggage.List{ + "ąść": { + Value: "B% 💼", + Properties: []baggage.Property{ + {Key: "ąść-1", Value: "B% 💼-1", HasValue: true}, + {Key: "ąść-2"}, + }, + }, + "foo": { + Value: "B% 💼-2", + Properties: []baggage.Property{ + {Key: "ąść", Value: "B% 💼-3", HasValue: true}, + {Key: "foo-1", Value: "B% 💼-4", HasValue: true}, + {Key: "foo-2"}, + }, + }, + }, + }, } orderer := func(s string) string { @@ -921,6 +1008,10 @@ func TestMemberValidation(t *testing.T) { m.hasData = true assert.ErrorIs(t, m.validate(), errInvalidKey) + // Invalid UTF-8 in value + m.key, m.value = "k", string([]byte{255}) + assert.ErrorIs(t, m.validate(), errInvalidValue) + m.key, m.value = "k", "\\" assert.NoError(t, m.validate()) } @@ -942,6 +1033,24 @@ func TestNewMember(t *testing.T) { } assert.Equal(t, expected, m) + // it won't use percent decoding for key + key = "%3B" + m, err = NewMember(key, val, p) + assert.NoError(t, err) + expected = Member{ + key: key, + value: val, + properties: properties{{key: "foo"}}, + hasData: true, + } + assert.Equal(t, expected, m) + + // wrong value with invalid token + key = "k" + val = ";" + _, err = NewMember(key, val, p) + assert.ErrorIs(t, err, errInvalidValue) + // wrong value with wrong decoding val = "%zzzzz" _, err = NewMember(key, val, p) @@ -986,6 +1095,31 @@ func TestNewMemberRaw(t *testing.T) { assert.Equal(t, expected, m) } +func TestBaggageUTF8(t *testing.T) { + testCases := map[string]string{ + "ąść": "B% 💼", + + // Case sensitive + "a": "a", + "A": "A", + } + + var members []Member + for k, v := range testCases { + m, err := NewMemberRaw(k, v) + require.NoError(t, err) + + members = append(members, m) + } + + b, err := New(members...) + require.NoError(t, err) + + for k, v := range testCases { + assert.Equal(t, v, b.Member(k).Value()) + } +} + func TestPropertiesValidate(t *testing.T) { p := properties{{}} assert.ErrorIs(t, p.validate(), errInvalidKey) @@ -1053,7 +1187,7 @@ func BenchmarkString(b *testing.B) { addMember("key1", "val1") addMember("key2", " ;,%") - addMember("key3", "Witaj świecie!") + addMember("B% 💼", "Witaj świecie!") addMember("key4", strings.Repeat("Hello world!", 10)) bg, err := New(members...)