Skip to content

fix(tracing): truncate long span attributes (#13270) [backport 2.21] (DO NOT MERGE) #13811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: 2.21
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddtrace/_trace/_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

MAX_SPAN_META_KEY_LEN = 200
MAX_SPAN_META_VALUE_LEN = 25000
TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500
32 changes: 20 additions & 12 deletions ddtrace/internal/_encoding.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY
from .constants import SPAN_LINKS_KEY
from .constants import SPAN_EVENTS_KEY
from .constants import MAX_UINT_64BITS
from .._trace._limits import MAX_SPAN_META_VALUE_LEN
from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
from ..settings._agent import config as agent_config


DEF MSGPACK_ARRAY_LENGTH_PREFIX_SIZE = 5
Expand Down Expand Up @@ -92,6 +95,10 @@ cdef inline int array_prefix_size(stdint.uint32_t l):
return 3
return MSGPACK_ARRAY_LENGTH_PREFIX_SIZE

cdef inline object truncate_string(object string):
if string and len(string) > MAX_SPAN_META_VALUE_LEN:
return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + "<truncated>..."
return string

cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
cdef int ret
Expand Down Expand Up @@ -129,31 +136,35 @@ cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:

if PyBytesLike_Check(text):
L = len(text)
if L > ITEM_LIMIT:
if L > MAX_SPAN_META_VALUE_LEN:
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(text).tp_name)
text = truncate_string(text)
L = len(text)
ret = msgpack_pack_raw(pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(pk, <char *> text, L)
return ret

if PyUnicode_Check(text):
if len(text) > MAX_SPAN_META_VALUE_LEN:
text = truncate_string(text)
IF PY_MAJOR_VERSION >= 3:
ret = msgpack_pack_unicode(pk, text, ITEM_LIMIT)
ret = msgpack_pack_unicode(pk, text, MAX_SPAN_META_VALUE_LEN)
if ret == -2:
raise ValueError("unicode string is too large")
ELSE:
text = PyUnicode_AsEncodedString(text, "utf-8", NULL)
L = len(text)
if L > ITEM_LIMIT:
if L > MAX_SPAN_META_VALUE_LEN:
raise ValueError("unicode string is too large")
ret = msgpack_pack_raw(pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(pk, <char *> text, L)

return ret

raise TypeError("Unhandled text type: %r" % type(text))


cdef class StringTable(object):
cdef dict _table
cdef stdint.uint32_t _next_id
Expand Down Expand Up @@ -220,7 +231,6 @@ cdef class ListStringTable(StringTable):
cdef class MsgpackStringTable(StringTable):
cdef msgpack_packer pk
cdef int max_size
cdef int _max_string_length
cdef int _sp_len
cdef stdint.uint32_t _sp_id
cdef object _lock
Expand All @@ -232,7 +242,6 @@ cdef class MsgpackStringTable(StringTable):
if self.pk.buf == NULL:
raise MemoryError("Unable to allocate internal buffer.")
self.max_size = max_size
self._max_string_length = int(0.1*max_size)
self.pk.length = MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE
self._sp_len = 0
self._lock = threading.RLock()
Expand All @@ -248,15 +257,13 @@ cdef class MsgpackStringTable(StringTable):
cdef insert(self, object string):
cdef int ret

if len(string) > self._max_string_length:
string = "<dropped string of length %d because it's too long (max allowed length %d)>" % (
len(string), self._max_string_length
)
# Before inserting, truncate the string if it is greater than MAX_SPAN_META_VALUE_LEN
string = truncate_string(string)

if self.pk.length + len(string) > self.max_size:
raise ValueError(
"Cannot insert '%s': string table is full (current size: %d, max size: %d)." % (
string, self.pk.length, self.max_size
"Cannot insert '%s': string table is full (current size: %d, size after insert: %d, max size: %d)." % (
string, self.pk.length, (self.pk.length + len(string)), self.max_size
)
)

Expand Down Expand Up @@ -846,6 +853,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
raise

cdef inline int _pack_string(self, object string) except? -1:
string = truncate_string(string)
return msgpack_pack_uint32(&self.pk, self._st._index(string))

cdef void * get_dd_origin_ref(self, str dd_origin):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
fixes:
- |
tracing: Fixes an issue where span attributes were not truncated before encoding, leading to runtime error and causing spans to be dropped.
Spans with resource name, tag key or value larger than 25000 characters will be truncated to 2500 characters.
26 changes: 0 additions & 26 deletions tests/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,32 +175,6 @@ def test_payload_too_large():
log.error.assert_not_called()


@skip_if_testagent
@pytest.mark.subprocess(
env=dict(
DD_TRACE_API_VERSION="v0.5",
DD_TRACE_WRITER_BUFFER_SIZE_BYTES=str(FOUR_KB),
)
)
def test_resource_name_too_large():
import pytest

from ddtrace.trace import tracer as t
from tests.integration.test_integration import FOUR_KB

assert t._writer._buffer_size == FOUR_KB
s = t.trace("operation", service="foo")
# Maximum string length is set to 10% of the maximum buffer size
s.resource = "B" * int(0.1 * FOUR_KB + 1)
try:
s.finish()
except ValueError:
pytest.fail()
encoded_spans, size = t._writer._encoder.encode()
assert size == 1
assert b"<dropped string of length 410 because it's too long (max allowed length 409)>" in encoded_spans


@parametrize_with_all_encodings
def test_large_payload_is_sent_without_warning_logs():
import mock
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/test_integration_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,13 @@ def test_setting_span_tags_and_metrics_generates_no_error_logs():
s.set_metric("number2", 12.0)
s.set_metric("number3", "1")
s.finish()


@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@pytest.mark.snapshot()
def test_encode_span_with_large_string_attributes(encoding):
from ddtrace import tracer

with override_global_config(dict(_trace_api=encoding)):
with tracer.trace(name="a" * 25000, resource="b" * 25001) as span:
span.set_tag(key="c" * 25001, value="d" * 2000)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading