Skip to content

Commit

Permalink
Replace safeTruncate with truncate
Browse files Browse the repository at this point in the history
  • Loading branch information
MrAlias committed Nov 22, 2024
1 parent 9bfa142 commit 345724e
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 28 deletions.
90 changes: 64 additions & 26 deletions sdk/trace/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,49 +345,87 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue {
if limit < 0 {
return attr
}

switch attr.Value.Type() {
case attribute.STRING:
if v := attr.Value.AsString(); len(v) > limit {
return attr.Key.String(safeTruncate(v, limit))
}
v := attr.Value.AsString()
return attr.Key.String(truncate(limit, v))
case attribute.STRINGSLICE:
v := attr.Value.AsStringSlice()
for i := range v {
if len(v[i]) > limit {
v[i] = safeTruncate(v[i], limit)
}
v[i] = truncate(limit, v[i])
}
return attr.Key.StringSlice(v)
}
return attr
}

// safeTruncate truncates the string and guarantees valid UTF-8 is returned.
func safeTruncate(input string, limit int) string {
if trunc, ok := safeTruncateValidUTF8(input, limit); ok {
return trunc
// truncate returns a truncated version of str such that it contains less than
// the limit number of characters. Truncation is applied by returning the limit
// number of valid characters contained in str.
//
// If limit is negative, it returns the original string.
//
// UTF-8 is supported. When truncating, all invalid characters are dropped
// before applying truncation.
//
// If str already contains less than the limit number of bytes, it is returned
// unchanged. No invalid characters are removed.
func truncate(limit int, str string) string {
if limit < 0 || len(str) <= limit {
return str
}

// Optimistically, assume all valid UTF-8.
var b strings.Builder
count := 0
for i, c := range str {
if c != utf8.RuneError {
count++
if count > limit {
return str[:i]
}
continue
}

_, size := utf8.DecodeRuneInString(str[i:])
if size == 1 {
b.Grow(len(str) - 1)
b.WriteString(str[:i])
str = str[i:]
break
}
}

// Fast-path, no invalid input.
if b.Cap() == 0 {
return str
}
trunc, _ := safeTruncateValidUTF8(strings.ToValidUTF8(input, ""), limit)
return trunc
}

// safeTruncateValidUTF8 returns a copy of the input string safely truncated to
// limit. The truncation is ensured to occur at the bounds of complete UTF-8
// characters. If invalid encoding of UTF-8 is encountered, input is returned
// with false, otherwise, the truncated input will be returned with true.
func safeTruncateValidUTF8(input string, limit int) (string, bool) {
for cnt := 0; cnt <= limit; {
r, size := utf8.DecodeRuneInString(input[cnt:])
if r == utf8.RuneError {
return input, false
// Truncate while validating UTF-8.
for i := 0; i < len(str) && count < limit; {
c := str[i]
if c < utf8.RuneSelf {
// Optimization for single byte runes (common case).
b.WriteByte(c)
i++
count++
continue
}

if cnt+size > limit {
return input[:cnt], true
_, size := utf8.DecodeRuneInString(str[i:])
if size == 1 {
// We checked for all 1-byte runes above, this is a RuneError.
i++
continue
}
cnt += size

b.WriteString(str[i : i+size])
i += size
count++
}
return input, true

return b.String()
}

// End ends the span. This method does nothing if the span is already ended or
Expand Down
4 changes: 2 additions & 2 deletions sdk/trace/span_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func TestTruncate(t *testing.T) {
for _, tt := range tests {
for _, g := range tt.groups {
t.Run(tt.name, func(t *testing.T) {
got := safeTruncate(g.input, g.limit)
got := truncate(g.limit, g.input)
assert.Equalf(
t, g.expected, got,
"input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)",
Expand All @@ -337,7 +337,7 @@ func BenchmarkTruncate(b *testing.B) {
b.ReportAllocs()
var out string
for i := 0; i < b.N; i++ {
out = safeTruncate(input, limit)
out = truncate(limit, input)
}
_ = out
}
Expand Down

0 comments on commit 345724e

Please sign in to comment.