Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GODRIVER-3090 Optimize logging truncation for large documents #1699

Merged
merged 34 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b76875c
GODRIVER-3090 Documents and Array
timothy-kim-mongo Jun 27, 2024
ae237c1
GODRIVER-3090
timothy-kim-mongo Jul 2, 2024
0c3ffc3
GODRIVER-3090 Benchmark Changes
timothy-kim-mongo Jul 3, 2024
019a3f0
GODRIVER-3090
timothy-kim-mongo Jul 3, 2024
9ac506c
GODRIVER-3090
timothy-kim-mongo Jul 10, 2024
8b0fffb
GODRIVER-3090 Implemented Logging Benchamark for Large Documents
timothy-kim-mongo Jul 11, 2024
b5594d4
Code Changes
timothy-kim-mongo Jul 12, 2024
404a0ba
Changes to ReadElement
timothy-kim-mongo Jul 12, 2024
4d09943
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 17, 2024
cea0ce1
codespell
timothy-kim-mongo Jul 17, 2024
3f451a2
codespell
timothy-kim-mongo Jul 17, 2024
ece7e4b
codespell
timothy-kim-mongo Jul 17, 2024
135fbf9
codespell
timothy-kim-mongo Jul 17, 2024
eabd7c3
codespell
timothy-kim-mongo Jul 17, 2024
9b76532
codespell
timothy-kim-mongo Jul 17, 2024
b32c7e2
Static Analysis
timothy-kim-mongo Jul 18, 2024
5f09448
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 22, 2024
7136ff3
GODRIVER-3090 codespell
timothy-kim-mongo Jul 22, 2024
ba4671e
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 24, 2024
dd52701
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 26, 2024
4558849
GODRIVER-3090 v2 import
timothy-kim-mongo Jul 26, 2024
82f3dce
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 30, 2024
76d8014
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 30, 2024
e976b8d
GODRIVER-3090 PR
timothy-kim-mongo Jul 30, 2024
81feb4f
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 31, 2024
16e33d4
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 31, 2024
7efcb73
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 31, 2024
bcfa73a
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 31, 2024
39e6d57
Merge branch 'master' into GODRIVER-3090
timothy-kim-mongo Jul 31, 2024
ea47181
GODRIVER-3090 PR Changes
timothy-kim-mongo Jul 31, 2024
33fe16e
GODRIVER-3090 PR Changes
timothy-kim-mongo Aug 1, 2024
64959f4
GODRIVER-3090 PR Changes
timothy-kim-mongo Aug 2, 2024
f7150fd
Merge branch 'master' into GODRIVER-3090
timothy-kim-mongo Aug 2, 2024
aba30d3
Merge branch 'master' into GODRIVER-3090
timothy-kim-mongo Aug 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 152 additions & 3 deletions bson/raw_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ func BenchmarkRawString(b *testing.B) {
// Create 1KiB and 128B strings to exercise the string-heavy call paths in
// the "Raw.String" method.
var buf strings.Builder
for i := 0; i < 128; i++ {
for i := 0; i < 16000000; i++ {
buf.WriteString("abcdefgh")
}
str1k := buf.String()
Expand Down Expand Up @@ -405,26 +405,175 @@ func BenchmarkRawString(b *testing.B) {
Nested string
}
Key2 string
Key3 int64
Key3 []string
Key4 float64
}{
Key1: struct{ Nested string }{Nested: str1k},
Key2: str1k,
Key3: 1234567890,
Key3: []string{str1k, str1k, str1k, str1k},
Key4: 1234567890.123456789,
},
},
// Very voluminous document with hundreds of thousands of keys
{
description: "very_voluminous_document",
value: createVoluminousDocument(100000),
},
// Document containing large strings and values
{
description: "large_strings_and_values",
value: createLargeStringsDocument(10),
},
// Document with massive arrays that are large
{
description: "massive_arrays",
value: createMassiveArraysDocument(100000),
},
}

for _, tc := range cases {
b.Run(tc.description, func(b *testing.B) {
bs, err := Marshal(tc.value)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = Raw(bs).String()
}
})

b.Run(tc.description+"_StringN", func(b *testing.B) {
bs, err := Marshal(tc.value)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
timothy-kim-mongo marked this conversation as resolved.
Show resolved Hide resolved
for i := 0; i < b.N; i++ {
_ = bsoncore.Document(bs).StringN(1024) // Assuming you want to limit to 1024 bytes for this benchmark
}
})
}
}

func TestComplexDocuments_StringN(t *testing.T) {
testCases := []struct {
description string
n int
doc any
}{
{"n>0, massive array documents", 1000, createMassiveArraysDocument(1000)},
{"n>0, voluminous document with unique values", 1000, createUniqueVoluminousDocument(t, 1000)},
{"n>0, large single document", 1000, createLargeSingleDoc(t)},
{"n>0, voluminous document with arrays containing documents", 1000, createVoluminousArrayDocuments(t, 1000)},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bson, _ := Marshal(tc.doc)
bsonDoc := bsoncore.Document(bson)

got := bsonDoc.StringN(tc.n)
assert.Equal(t, tc.n, len(got))
})
}
}

// createVoluminousDocument generates a document with a specified number of keys, simulating a very large document in terms of the number of keys.
func createVoluminousDocument(numKeys int) D {
d := make(D, numKeys)
for i := 0; i < numKeys; i++ {
d = append(d, E{Key: fmt.Sprintf("key%d", i), Value: "value"})
}
return d
}

// createLargeStringsDocument generates a document with large string values, simulating a document with very large data values.
func createLargeStringsDocument(sizeMB int) D {
largeString := strings.Repeat("a", sizeMB*1024*1024)
return D{
{Key: "largeString1", Value: largeString},
{Key: "largeString2", Value: largeString},
{Key: "largeString3", Value: largeString},
{Key: "largeString4", Value: largeString},
}
}

// createMassiveArraysDocument generates a document with massive arrays, simulating a document that contains large arrays of data.
func createMassiveArraysDocument(arraySize int) D {
massiveArray := make([]string, arraySize)
for i := 0; i < arraySize; i++ {
massiveArray[i] = "value"
}

return D{
{Key: "massiveArray1", Value: massiveArray},
{Key: "massiveArray2", Value: massiveArray},
{Key: "massiveArray3", Value: massiveArray},
{Key: "massiveArray4", Value: massiveArray},
}
}

// createUniqueVoluminousDocument creates a BSON document with multiple key value pairs and unique value types.
func createUniqueVoluminousDocument(t *testing.T, size int) bsoncore.Document {
t.Helper()

docs := make(D, size)

for i := 0; i < size; i++ {
docs = append(docs, E{
Key: "x", Value: NewObjectID(),
})
docs = append(docs, E{
Key: "z", Value: "y",
})
}

bsonData, err := Marshal(docs)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}

// createLargeSingleDoc creates a large single BSON document.
func createLargeSingleDoc(t *testing.T) bsoncore.Document {
t.Helper()

var b strings.Builder
b.Grow(1048577)

for i := 0; i < 1048577; i++ {
b.WriteByte(0)
}
s := b.String()

doc := D{
{Key: "x", Value: s},
}

bsonData, err := Marshal(doc)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}

// createVoluminousArrayDocuments creates a volumninous BSON document with arrays containing documents.
func createVoluminousArrayDocuments(t *testing.T, size int) bsoncore.Document {
t.Helper()

docs := make(D, size)

for i := 0; i < size; i++ {
docs = append(docs, E{
Key: "x", Value: NewObjectID(),
})
docs = append(docs, E{
Key: "z", Value: A{D{{Key: "x", Value: "y"}}},
})
}

bsonData, err := Marshal(docs)
assert.NoError(t, err)

return bsoncore.Document(bsonData)
}
40 changes: 40 additions & 0 deletions internal/bsoncoreutil/bsoncoreutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (C) MongoDB, Inc. 2024-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bsoncoreutil

// Truncate truncates a given string for a certain width
func Truncate(str string, width int) string {
if width == 0 {
return ""
}

if len(str) <= width {
return str
}

// Truncate the byte slice of the string to the given width.
newStr := str[:width]

// Check if the last byte is at the beginning of a multi-byte character.
// If it is, then remove the last byte.
if newStr[len(newStr)-1]&0xC0 == 0xC0 {
return newStr[:len(newStr)-1]
}

// Check if the last byte is a multi-byte character
if newStr[len(newStr)-1]&0xC0 == 0x80 {
// If it is, step back until you we are at the start of a character
for i := len(newStr) - 1; i >= 0; i-- {
if newStr[i]&0xC0 == 0xC0 {
// Truncate at the end of the character before the character we stepped back to
return newStr[:i]
}
}
}

return newStr
}
59 changes: 59 additions & 0 deletions internal/bsoncoreutil/bsoncoreutil_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (C) MongoDB, Inc. 2024-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bsoncoreutil

import (
"testing"

"go.mongodb.org/mongo-driver/v2/internal/assert"
)

func TestTruncate(t *testing.T) {
t.Parallel()

for _, tcase := range []struct {
name string
arg string
width int
expected string
}{
{
name: "empty",
arg: "",
width: 0,
expected: "",
},
{
name: "short",
arg: "foo",
width: 1000,
expected: "foo",
},
{
name: "long",
arg: "foo bar baz",
width: 9,
expected: "foo bar b",
},
{
name: "multi-byte",
arg: "你好",
width: 4,
expected: "你",
},
} {
tcase := tcase

t.Run(tcase.name, func(t *testing.T) {
t.Parallel()

actual := Truncate(tcase.arg, tcase.width)
assert.Equal(t, tcase.expected, actual)
})
}

}
54 changes: 22 additions & 32 deletions internal/logger/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import (
"os"
"strconv"
"strings"

"go.mongodb.org/mongo-driver/v2/bson"
"go.mongodb.org/mongo-driver/v2/internal/bsoncoreutil"
"go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore"
)

// DefaultMaxDocumentLength is the default maximum number of bytes that can be
Expand Down Expand Up @@ -230,46 +234,32 @@ func selectComponentLevels(componentLevels map[Component]Level) map[Component]Le
return selected
}

// truncate will truncate a string to the given width, appending "..." to the
// end of the string if it is truncated. This routine is safe for multi-byte
// characters.
func truncate(str string, width uint) string {
if width == 0 {
return ""
}

if len(str) <= int(width) {
return str
// FormatDocument formats a BSON document or RawValue for logging. The document is truncated
// to the given width.
func FormatDocument(msg bson.Raw, width uint) string {
if len(msg) == 0 {
return "{}"
}

// Truncate the byte slice of the string to the given width.
newStr := str[:width]
str := bsoncore.Document(msg).StringN(int(width))

// Check if the last byte is at the beginning of a multi-byte character.
// If it is, then remove the last byte.
if newStr[len(newStr)-1]&0xC0 == 0xC0 {
return newStr[:len(newStr)-1] + TruncationSuffix
// If the last byte is not a closing bracket, then the document was truncated
if len(str) > 0 && str[len(str)-1] != '}' {
str += TruncationSuffix
}

// Check if the last byte is in the middle of a multi-byte character. If
// it is, then step back until we find the beginning of the character.
if newStr[len(newStr)-1]&0xC0 == 0x80 {
for i := len(newStr) - 1; i >= 0; i-- {
if newStr[i]&0xC0 == 0xC0 {
return newStr[:i] + TruncationSuffix
}
}
}

return newStr + TruncationSuffix
return str
}

// FormatMessage formats a BSON document for logging. The document is truncated
// FormatString formats a String for logging. The string is truncated
// to the given width.
func FormatMessage(msg string, width uint) string {
if len(msg) == 0 {
return "{}"
func FormatString(str string, width uint) string {
strTrunc := bsoncoreutil.Truncate(str, int(width))

// Checks if the string was truncating by comparing the lengths of the two strings.
if len(strTrunc) < len(str) {
strTrunc += TruncationSuffix
}

return truncate(msg, width)
return strTrunc
}
Loading
Loading