Skip to content

Commit

Permalink
GODRIVER-3090 Optimize logging truncation for large documents (#1699)
Browse files Browse the repository at this point in the history
  • Loading branch information
timothy-kim-mongo authored Aug 6, 2024
1 parent 8f1eac5 commit b0d91d0
Show file tree
Hide file tree
Showing 14 changed files with 1,060 additions and 150 deletions.
155 changes: 152 additions & 3 deletions bson/raw_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ func BenchmarkRawString(b *testing.B) {
// Create 1KiB and 128B strings to exercise the string-heavy call paths in
// the "Raw.String" method.
var buf strings.Builder
for i := 0; i < 128; i++ {
for i := 0; i < 16000000; i++ {
buf.WriteString("abcdefgh")
}
str1k := buf.String()
Expand Down Expand Up @@ -405,26 +405,175 @@ func BenchmarkRawString(b *testing.B) {
Nested string
}
Key2 string
Key3 int64
Key3 []string
Key4 float64
}{
Key1: struct{ Nested string }{Nested: str1k},
Key2: str1k,
Key3: 1234567890,
Key3: []string{str1k, str1k, str1k, str1k},
Key4: 1234567890.123456789,
},
},
// Very voluminous document with hundreds of thousands of keys
{
description: "very_voluminous_document",
value: createVoluminousDocument(100000),
},
// Document containing large strings and values
{
description: "large_strings_and_values",
value: createLargeStringsDocument(10),
},
// Document with massive arrays that are large
{
description: "massive_arrays",
value: createMassiveArraysDocument(100000),
},
}

for _, tc := range cases {
b.Run(tc.description, func(b *testing.B) {
bs, err := Marshal(tc.value)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = Raw(bs).String()
}
})

b.Run(tc.description+"_StringN", func(b *testing.B) {
bs, err := Marshal(tc.value)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = bsoncore.Document(bs).StringN(1024) // Assuming you want to limit to 1024 bytes for this benchmark
}
})
}
}

func TestComplexDocuments_StringN(t *testing.T) {
testCases := []struct {
description string
n int
doc any
}{
{"n>0, massive array documents", 1000, createMassiveArraysDocument(1000)},
{"n>0, voluminous document with unique values", 1000, createUniqueVoluminousDocument(t, 1000)},
{"n>0, large single document", 1000, createLargeSingleDoc(t)},
{"n>0, voluminous document with arrays containing documents", 1000, createVoluminousArrayDocuments(t, 1000)},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bson, _ := Marshal(tc.doc)
bsonDoc := bsoncore.Document(bson)

got := bsonDoc.StringN(tc.n)
assert.Equal(t, tc.n, len(got))
})
}
}

// createVoluminousDocument generates a document with a specified number of keys, simulating a very large document in terms of the number of keys.
func createVoluminousDocument(numKeys int) D {
d := make(D, numKeys)
for i := 0; i < numKeys; i++ {
d = append(d, E{Key: fmt.Sprintf("key%d", i), Value: "value"})
}
return d
}

// createLargeStringsDocument generates a document with large string values, simulating a document with very large data values.
func createLargeStringsDocument(sizeMB int) D {
largeString := strings.Repeat("a", sizeMB*1024*1024)
return D{
{Key: "largeString1", Value: largeString},
{Key: "largeString2", Value: largeString},
{Key: "largeString3", Value: largeString},
{Key: "largeString4", Value: largeString},
}
}

// createMassiveArraysDocument generates a document with massive arrays, simulating a document that contains large arrays of data.
func createMassiveArraysDocument(arraySize int) D {
massiveArray := make([]string, arraySize)
for i := 0; i < arraySize; i++ {
massiveArray[i] = "value"
}

return D{
{Key: "massiveArray1", Value: massiveArray},
{Key: "massiveArray2", Value: massiveArray},
{Key: "massiveArray3", Value: massiveArray},
{Key: "massiveArray4", Value: massiveArray},
}
}

// createUniqueVoluminousDocument creates a BSON document with multiple key value pairs and unique value types.
func createUniqueVoluminousDocument(t *testing.T, size int) bsoncore.Document {
t.Helper()

docs := make(D, size)

for i := 0; i < size; i++ {
docs = append(docs, E{
Key: "x", Value: NewObjectID(),
})
docs = append(docs, E{
Key: "z", Value: "y",
})
}

bsonData, err := Marshal(docs)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}

// createLargeSingleDoc creates a large single BSON document.
func createLargeSingleDoc(t *testing.T) bsoncore.Document {
t.Helper()

var b strings.Builder
b.Grow(1048577)

for i := 0; i < 1048577; i++ {
b.WriteByte(0)
}
s := b.String()

doc := D{
{Key: "x", Value: s},
}

bsonData, err := Marshal(doc)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}

// createVoluminousArrayDocuments creates a volumninous BSON document with arrays containing documents.
func createVoluminousArrayDocuments(t *testing.T, size int) bsoncore.Document {
t.Helper()

docs := make(D, size)

for i := 0; i < size; i++ {
docs = append(docs, E{
Key: "x", Value: NewObjectID(),
})
docs = append(docs, E{
Key: "z", Value: A{D{{Key: "x", Value: "y"}}},
})
}

bsonData, err := Marshal(docs)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}
40 changes: 40 additions & 0 deletions internal/bsoncoreutil/bsoncoreutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (C) MongoDB, Inc. 2024-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bsoncoreutil

// Truncate truncates a given string for a certain width
func Truncate(str string, width int) string {
if width == 0 {
return ""
}

if len(str) <= width {
return str
}

// Truncate the byte slice of the string to the given width.
newStr := str[:width]

// Check if the last byte is at the beginning of a multi-byte character.
// If it is, then remove the last byte.
if newStr[len(newStr)-1]&0xC0 == 0xC0 {
return newStr[:len(newStr)-1]
}

// Check if the last byte is a multi-byte character
if newStr[len(newStr)-1]&0xC0 == 0x80 {
// If it is, step back until you we are at the start of a character
for i := len(newStr) - 1; i >= 0; i-- {
if newStr[i]&0xC0 == 0xC0 {
// Truncate at the end of the character before the character we stepped back to
return newStr[:i]
}
}
}

return newStr
}
59 changes: 59 additions & 0 deletions internal/bsoncoreutil/bsoncoreutil_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (C) MongoDB, Inc. 2024-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bsoncoreutil

import (
"testing"

"go.mongodb.org/mongo-driver/v2/internal/assert"
)

func TestTruncate(t *testing.T) {
t.Parallel()

for _, tcase := range []struct {
name string
arg string
width int
expected string
}{
{
name: "empty",
arg: "",
width: 0,
expected: "",
},
{
name: "short",
arg: "foo",
width: 1000,
expected: "foo",
},
{
name: "long",
arg: "foo bar baz",
width: 9,
expected: "foo bar b",
},
{
name: "multi-byte",
arg: "你好",
width: 4,
expected: "你",
},
} {
tcase := tcase

t.Run(tcase.name, func(t *testing.T) {
t.Parallel()

actual := Truncate(tcase.arg, tcase.width)
assert.Equal(t, tcase.expected, actual)
})
}

}
54 changes: 22 additions & 32 deletions internal/logger/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import (
"os"
"strconv"
"strings"

"go.mongodb.org/mongo-driver/v2/bson"
"go.mongodb.org/mongo-driver/v2/internal/bsoncoreutil"
"go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore"
)

// DefaultMaxDocumentLength is the default maximum number of bytes that can be
Expand Down Expand Up @@ -230,46 +234,32 @@ func selectComponentLevels(componentLevels map[Component]Level) map[Component]Le
return selected
}

// truncate will truncate a string to the given width, appending "..." to the
// end of the string if it is truncated. This routine is safe for multi-byte
// characters.
func truncate(str string, width uint) string {
if width == 0 {
return ""
}

if len(str) <= int(width) {
return str
// FormatDocument formats a BSON document or RawValue for logging. The document is truncated
// to the given width.
func FormatDocument(msg bson.Raw, width uint) string {
if len(msg) == 0 {
return "{}"
}

// Truncate the byte slice of the string to the given width.
newStr := str[:width]
str := bsoncore.Document(msg).StringN(int(width))

// Check if the last byte is at the beginning of a multi-byte character.
// If it is, then remove the last byte.
if newStr[len(newStr)-1]&0xC0 == 0xC0 {
return newStr[:len(newStr)-1] + TruncationSuffix
// If the last byte is not a closing bracket, then the document was truncated
if len(str) > 0 && str[len(str)-1] != '}' {
str += TruncationSuffix
}

// Check if the last byte is in the middle of a multi-byte character. If
// it is, then step back until we find the beginning of the character.
if newStr[len(newStr)-1]&0xC0 == 0x80 {
for i := len(newStr) - 1; i >= 0; i-- {
if newStr[i]&0xC0 == 0xC0 {
return newStr[:i] + TruncationSuffix
}
}
}

return newStr + TruncationSuffix
return str
}

// FormatMessage formats a BSON document for logging. The document is truncated
// FormatString formats a String for logging. The string is truncated
// to the given width.
func FormatMessage(msg string, width uint) string {
if len(msg) == 0 {
return "{}"
func FormatString(str string, width uint) string {
strTrunc := bsoncoreutil.Truncate(str, int(width))

// Checks if the string was truncating by comparing the lengths of the two strings.
if len(strTrunc) < len(str) {
strTrunc += TruncationSuffix
}

return truncate(msg, width)
return strTrunc
}
Loading

0 comments on commit b0d91d0

Please sign in to comment.