Skip to content

Commit

Permalink
Fix non-ascii/utf8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Yun-Kim committed Jan 17, 2025
1 parent 5d498f4 commit 3fab554
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
4 changes: 2 additions & 2 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,13 @@ def _llmobs_span_event(cls, span: Span) -> Tuple[Dict[str, Any], bool]:
if span_kind == "llm" and span._get_ctx_item(INPUT_MESSAGES) is not None:
meta["input"]["messages"] = span._get_ctx_item(INPUT_MESSAGES)
if span._get_ctx_item(INPUT_VALUE) is not None:
meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE))
meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE), ensure_ascii=False)
if span_kind == "llm" and span._get_ctx_item(OUTPUT_MESSAGES) is not None:
meta["output"]["messages"] = span._get_ctx_item(OUTPUT_MESSAGES)
if span_kind == "embedding" and span._get_ctx_item(INPUT_DOCUMENTS) is not None:
meta["input"]["documents"] = span._get_ctx_item(INPUT_DOCUMENTS)
if span._get_ctx_item(OUTPUT_VALUE) is not None:
meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE))
meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE), ensure_ascii=False)
if span_kind == "retrieval" and span._get_ctx_item(OUTPUT_DOCUMENTS) is not None:
meta["output"]["documents"] = span._get_ctx_item(OUTPUT_DOCUMENTS)
if span._get_ctx_item(INPUT_PROMPT) is not None:
Expand Down
4 changes: 2 additions & 2 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,10 @@ def _unserializable_default_repr(obj):
return default_repr


def safe_json(obj):
def safe_json(obj, ensure_ascii=True):
if isinstance(obj, str):
return obj
try:
return json.dumps(obj, ensure_ascii=False, skipkeys=True, default=_unserializable_default_repr)
return json.dumps(obj, ensure_ascii=ensure_ascii, skipkeys=True, default=_unserializable_default_repr)
except Exception:
log.error("Failed to serialize object to JSON.", exc_info=True)
3 changes: 2 additions & 1 deletion ddtrace/llmobs/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ def __init__(
headers = {}
clients = [] # type: List[WriterClientBase]
if is_agentless:
assert agentless_url, "agentless_url is required for agentless mode"
if not agentless_url:
raise ValueError("agentless_url is required for agentless mode")
clients.append(LLMObsAgentlessEventClient())
intake_url = agentless_url
headers["DD-API-KEY"] = config._dd_api_key
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
LLM Observability: This fix resolves an issue where annotating a span with non utf-8 input/output values resulted in encoding errors.
20 changes: 15 additions & 5 deletions tests/llmobs/test_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,28 @@ def test_only_generate_span_events_from_llmobs_spans(tracer, llmobs_events):
assert llmobs_events[0] == expected_grandchild_llmobs_span


def test_utf_inputs_outputs(llmobs, llmobs_backend):
def test_utf_non_ascii_io(llmobs, llmobs_backend):
with llmobs.workflow() as workflow_span:
with llmobs.llm(model_name="gpt-3.5-turbo-0125") as llm_span:
llmobs.annotate(llm_span, input_data="안녕, 지금 몇 시야?")
llmobs.annotate(workflow_span, input_data="안녕, 지금 몇 시야?")
events = llmobs_backend.wait_for_num_events(num=1)
assert len(events) == 1
assert events[0]["spans"][0]["meta"]["input"]["messages"][0]["content"] == "안녕, 지금 몇 시야?"
assert events[0]["spans"][1]["meta"]["input"]["value"] == "안녕, 지금 몇 시야?"


def test_non_utf8_inputs_outputs(llmobs, llmobs_backend):
"""Test that latin1 encoded inputs and outputs are correctly decoded."""
with llmobs.llm(model_name="gpt-3.5-turbo-0125") as span:
llmobs.annotate(
span,
# uncomment to repro issue
# input_data="The first Super Bowl, which was formally known as the First AFL–NFL World Championship Game, was played on January 15, 1967.",
input_data="The first Super Bowl, which was formally known as the First AFL-NFL World Championship Game, was played on January 15, 1967.",
input_data="The first Super Bowl (aka First AFL–NFL World Championship Game), was played in 1967.",
)

events = llmobs_backend.wait_for_num_events(num=1)
assert len(events) == 1
assert (
events[0]["spans"][0]["meta"]["input"]["messages"][0]["content"]
== "The first Super Bowl, which was formally known as the First AFL-NFL World Championship Game, was played on January 15, 1967."
== "The first Super Bowl (aka First AFLNFL World Championship Game), was played in 1967."
)

0 comments on commit 3fab554

Please sign in to comment.