diff --git a/.riot/requirements/16562eb.txt b/.riot/requirements/16562eb.txt new file mode 100644 index 00000000000..e2aac88c146 --- /dev/null +++ b/.riot/requirements/16562eb.txt @@ -0,0 +1,32 @@ +# +# This file is autogenerated by pip-compile with Python 3.7 +# by the following command: +# +# pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/16562eb.in +# +attrs==24.2.0 +coverage[toml]==7.2.7 +exceptiongroup==1.2.2 +hypothesis==6.45.0 +idna==3.10 +importlib-metadata==6.7.0 +iniconfig==2.0.0 +mock==5.1.0 +multidict==6.0.5 +opentracing==2.4.0 +packaging==24.0 +pluggy==1.2.0 +pytest==7.4.4 +pytest-asyncio==0.21.1 +pytest-cov==4.1.0 +pytest-mock==3.11.1 +pyyaml==6.0.1 +six==1.17.0 +sortedcontainers==2.4.0 +tomli==2.0.1 +typing-extensions==4.7.1 +urllib3==1.26.20 +vcrpy==4.4.0 +wrapt==1.16.0 +yarl==1.9.4 +zipp==3.15.0 diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 02d5fed7626..af9b09d3e02 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -41,6 +41,7 @@ from ddtrace.internal.atexit import register_on_exit_signal from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY from ddtrace.internal.constants import SPAN_API_DATADOG +from ddtrace.internal.core import dispatch from ddtrace.internal.dogstatsd import get_dogstatsd_client from ddtrace.internal.logger import get_logger from ddtrace.internal.peer_service.processor import PeerServiceProcessor @@ -849,7 +850,7 @@ def _start_span( for p in chain(self._span_processors, SpanProcessor.__processors__, self._deferred_processors): p.on_span_start(span) self._hooks.emit(self.__class__.start_span, span) - + dispatch("trace.span_start", (span,)) return span start_span = _start_span @@ -866,6 +867,8 @@ def _on_span_finish(self, span: Span) -> None: for p in chain(self._span_processors, SpanProcessor.__processors__, self._deferred_processors): p.on_span_finish(span) + dispatch("trace.span_finish", (span,)) + if log.isEnabledFor(logging.DEBUG): log.debug("finishing span %s (enabled:%s)", span._pprint(), self.enabled) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 49815151118..cd4069b4094 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -3,7 +3,9 @@ import time from typing import Any from typing import Dict +from typing import List from typing import Optional +from typing import Tuple from typing import Union import ddtrace @@ -11,8 +13,12 @@ from ddtrace import config from ddtrace import patch from ddtrace._trace.context import Context +from ddtrace.constants import ERROR_MSG +from ddtrace.constants import ERROR_STACK +from ddtrace.constants import ERROR_TYPE from ddtrace.ext import SpanTypes from ddtrace.internal import atexit +from ddtrace.internal import core from ddtrace.internal import forksafe from ddtrace.internal._rand import rand64bits from ddtrace.internal.compat import ensure_text @@ -24,6 +30,7 @@ from ddtrace.internal.telemetry.constants import TELEMETRY_APM_PRODUCT from ddtrace.internal.utils.formats import asbool from ddtrace.internal.utils.formats import parse_tags_str +from ddtrace.llmobs import _constants as constants from ddtrace.llmobs._constants import ANNOTATIONS_CONTEXT_ID from ddtrace.llmobs._constants import INPUT_DOCUMENTS from ddtrace.llmobs._constants import INPUT_MESSAGES @@ -45,11 +52,11 @@ from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING from ddtrace.llmobs._constants import TAGS from ddtrace.llmobs._evaluators.runner import EvaluatorRunner -from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor from ddtrace.llmobs._utils import AnnotationContext from ddtrace.llmobs._utils import _get_llmobs_parent_id from ddtrace.llmobs._utils import _get_ml_app from ddtrace.llmobs._utils import _get_session_id +from ddtrace.llmobs._utils import _get_span_name from ddtrace.llmobs._utils import _inject_llmobs_parent_id from ddtrace.llmobs._utils import safe_json from ddtrace.llmobs._utils import validate_prompt @@ -81,34 +88,157 @@ class LLMObs(Service): def __init__(self, tracer=None): super(LLMObs, self).__init__() self.tracer = tracer or ddtrace.tracer - self._llmobs_span_writer = None - self._llmobs_span_writer = LLMObsSpanWriter( is_agentless=config._llmobs_agentless_enabled, interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)), timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)), ) - self._llmobs_eval_metric_writer = LLMObsEvalMetricWriter( site=config._dd_site, api_key=config._dd_api_key, interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)), timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)), ) - self._evaluator_runner = EvaluatorRunner( interval=float(os.getenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 1.0)), llmobs_service=self, ) - self._trace_processor = LLMObsTraceProcessor(self._llmobs_span_writer, self._evaluator_runner) forksafe.register(self._child_after_fork) self._annotations = [] self._annotation_context_lock = forksafe.RLock() - self.tracer.on_start_span(self._do_annotations) - def _do_annotations(self, span): + # Register hooks for span events + core.on("trace.span_start", self._do_annotations) + core.on("trace.span_finish", self._on_span_finish) + + def _on_span_finish(self, span): + if self.enabled and span.span_type == SpanTypes.LLM: + self._submit_llmobs_span(span) + + def _submit_llmobs_span(self, span: Span) -> None: + """Generate and submit an LLMObs span event to be sent to LLMObs.""" + span_event = None + is_llm_span = span._get_ctx_item(SPAN_KIND) == "llm" + is_ragas_integration_span = False + try: + span_event, is_ragas_integration_span = self._llmobs_span_event(span) + self._llmobs_span_writer.enqueue(span_event) + except (KeyError, TypeError): + log.error( + "Error generating LLMObs span event for span %s, likely due to malformed span", span, exc_info=True + ) + finally: + if not span_event or not is_llm_span or is_ragas_integration_span: + return + if self._evaluator_runner: + self._evaluator_runner.enqueue(span_event, span) + + @classmethod + def _llmobs_span_event(cls, span: Span) -> Tuple[Dict[str, Any], bool]: + """Span event object structure.""" + span_kind = span._get_ctx_item(SPAN_KIND) + if not span_kind: + raise KeyError("Span kind not found in span context") + meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}} + if span_kind in ("llm", "embedding") and span._get_ctx_item(MODEL_NAME) is not None: + meta["model_name"] = span._get_ctx_item(MODEL_NAME) + meta["model_provider"] = (span._get_ctx_item(MODEL_PROVIDER) or "custom").lower() + meta["metadata"] = span._get_ctx_item(METADATA) or {} + if span._get_ctx_item(INPUT_PARAMETERS): + meta["input"]["parameters"] = span._get_ctx_item(INPUT_PARAMETERS) + if span_kind == "llm" and span._get_ctx_item(INPUT_MESSAGES) is not None: + meta["input"]["messages"] = span._get_ctx_item(INPUT_MESSAGES) + if span._get_ctx_item(INPUT_VALUE) is not None: + meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE)) + if span_kind == "llm" and span._get_ctx_item(OUTPUT_MESSAGES) is not None: + meta["output"]["messages"] = span._get_ctx_item(OUTPUT_MESSAGES) + if span_kind == "embedding" and span._get_ctx_item(INPUT_DOCUMENTS) is not None: + meta["input"]["documents"] = span._get_ctx_item(INPUT_DOCUMENTS) + if span._get_ctx_item(OUTPUT_VALUE) is not None: + meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE)) + if span_kind == "retrieval" and span._get_ctx_item(OUTPUT_DOCUMENTS) is not None: + meta["output"]["documents"] = span._get_ctx_item(OUTPUT_DOCUMENTS) + if span._get_ctx_item(INPUT_PROMPT) is not None: + prompt_json_str = span._get_ctx_item(INPUT_PROMPT) + if span_kind != "llm": + log.warning( + "Dropping prompt on non-LLM span kind, annotating prompts is only supported for LLM span kinds." + ) + else: + meta["input"]["prompt"] = prompt_json_str + if span.error: + meta.update( + { + ERROR_MSG: span.get_tag(ERROR_MSG), + ERROR_STACK: span.get_tag(ERROR_STACK), + ERROR_TYPE: span.get_tag(ERROR_TYPE), + } + ) + if not meta["input"]: + meta.pop("input") + if not meta["output"]: + meta.pop("output") + metrics = span._get_ctx_item(METRICS) or {} + ml_app = _get_ml_app(span) + + is_ragas_integration_span = False + + if ml_app.startswith(constants.RAGAS_ML_APP_PREFIX): + is_ragas_integration_span = True + + span._set_ctx_item(ML_APP, ml_app) + parent_id = str(_get_llmobs_parent_id(span) or "undefined") + + llmobs_span_event = { + "trace_id": "{:x}".format(span.trace_id), + "span_id": str(span.span_id), + "parent_id": parent_id, + "name": _get_span_name(span), + "start_ns": span.start_ns, + "duration": span.duration_ns, + "status": "error" if span.error else "ok", + "meta": meta, + "metrics": metrics, + } + session_id = _get_session_id(span) + if session_id is not None: + span._set_ctx_item(SESSION_ID, session_id) + llmobs_span_event["session_id"] = session_id + + llmobs_span_event["tags"] = cls._llmobs_tags( + span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span + ) + return llmobs_span_event, is_ragas_integration_span + + @staticmethod + def _llmobs_tags( + span: Span, ml_app: str, session_id: Optional[str] = None, is_ragas_integration_span: bool = False + ) -> List[str]: + tags = { + "version": config.version or "", + "env": config.env or "", + "service": span.service or "", + "source": "integration", + "ml_app": ml_app, + "ddtrace.version": ddtrace.__version__, + "language": "python", + "error": span.error, + } + err_type = span.get_tag(ERROR_TYPE) + if err_type: + tags["error_type"] = err_type + if session_id: + tags["session_id"] = session_id + if is_ragas_integration_span: + tags[constants.RUNNER_IS_INTEGRATION_SPAN_TAG] = "ragas" + existing_tags = span._get_ctx_item(TAGS) + if existing_tags is not None: + tags.update(existing_tags) + return ["{}:{}".format(k, v) for k, v in tags.items()] + + def _do_annotations(self, span: Span) -> None: # get the current span context # only do the annotations if it matches the context if span.span_type != SpanTypes.LLM: # do this check to avoid the warning log in `annotate` @@ -120,20 +250,14 @@ def _do_annotations(self, span): if current_context_id == context_id: self.annotate(span, **annotation_kwargs) - def _child_after_fork(self): + def _child_after_fork(self) -> None: self._llmobs_span_writer = self._llmobs_span_writer.recreate() self._llmobs_eval_metric_writer = self._llmobs_eval_metric_writer.recreate() self._evaluator_runner = self._evaluator_runner.recreate() - self._trace_processor._span_writer = self._llmobs_span_writer - self._trace_processor._evaluator_runner = self._evaluator_runner if self.enabled: self._start_service() def _start_service(self) -> None: - tracer_filters = self.tracer._filters - if not any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in tracer_filters): - tracer_filters += [self._trace_processor] - self.tracer.configure(settings={"FILTERS": tracer_filters}) try: self._llmobs_span_writer.start() self._llmobs_eval_metric_writer.start() @@ -160,11 +284,7 @@ def _stop_service(self) -> None: except ServiceStatusError: log.debug("Error stopping LLMObs writers") - try: - forksafe.unregister(self._child_after_fork) - self.tracer.shutdown() - except Exception: - log.warning("Failed to shutdown tracer", exc_info=True) + forksafe.unregister(self._child_after_fork) @classmethod def enable( @@ -265,7 +385,6 @@ def disable(cls) -> None: cls._instance.stop() cls.enabled = False - cls._instance.tracer.deregister_on_start_span(cls._instance._do_annotations) telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, False) log.debug("%s disabled", cls.__name__) diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py deleted file mode 100644 index 231d53d7626..00000000000 --- a/ddtrace/llmobs/_trace_processor.py +++ /dev/null @@ -1,177 +0,0 @@ -from typing import Any -from typing import Dict -from typing import List -from typing import Optional -from typing import Tuple - -import ddtrace -from ddtrace import Span -from ddtrace import config -from ddtrace._trace.processor import TraceProcessor -from ddtrace.constants import ERROR_MSG -from ddtrace.constants import ERROR_STACK -from ddtrace.constants import ERROR_TYPE -from ddtrace.ext import SpanTypes -from ddtrace.internal.logger import get_logger -from ddtrace.llmobs._constants import INPUT_DOCUMENTS -from ddtrace.llmobs._constants import INPUT_MESSAGES -from ddtrace.llmobs._constants import INPUT_PARAMETERS -from ddtrace.llmobs._constants import INPUT_PROMPT -from ddtrace.llmobs._constants import INPUT_VALUE -from ddtrace.llmobs._constants import METADATA -from ddtrace.llmobs._constants import METRICS -from ddtrace.llmobs._constants import ML_APP -from ddtrace.llmobs._constants import MODEL_NAME -from ddtrace.llmobs._constants import MODEL_PROVIDER -from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS -from ddtrace.llmobs._constants import OUTPUT_MESSAGES -from ddtrace.llmobs._constants import OUTPUT_VALUE -from ddtrace.llmobs._constants import RAGAS_ML_APP_PREFIX -from ddtrace.llmobs._constants import RUNNER_IS_INTEGRATION_SPAN_TAG -from ddtrace.llmobs._constants import SESSION_ID -from ddtrace.llmobs._constants import SPAN_KIND -from ddtrace.llmobs._constants import TAGS -from ddtrace.llmobs._utils import _get_llmobs_parent_id -from ddtrace.llmobs._utils import _get_ml_app -from ddtrace.llmobs._utils import _get_session_id -from ddtrace.llmobs._utils import _get_span_name -from ddtrace.llmobs._utils import safe_json - - -log = get_logger(__name__) - - -class LLMObsTraceProcessor(TraceProcessor): - """ - Processor that extracts LLM-type spans in a trace to submit as separate LLMObs span events to LLM Observability. - """ - - def __init__(self, llmobs_span_writer, evaluator_runner=None): - self._span_writer = llmobs_span_writer - self._evaluator_runner = evaluator_runner - - def process_trace(self, trace: List[Span]) -> Optional[List[Span]]: - if not trace: - return None - for span in trace: - if span.span_type == SpanTypes.LLM: - self.submit_llmobs_span(span) - return None if config._llmobs_agentless_enabled else trace - - def submit_llmobs_span(self, span: Span) -> None: - """Generate and submit an LLMObs span event to be sent to LLMObs.""" - span_event = None - is_llm_span = span._get_ctx_item(SPAN_KIND) == "llm" - is_ragas_integration_span = False - try: - span_event, is_ragas_integration_span = self._llmobs_span_event(span) - self._span_writer.enqueue(span_event) - except (KeyError, TypeError): - log.error("Error generating LLMObs span event for span %s, likely due to malformed span", span) - finally: - if not span_event or not is_llm_span or is_ragas_integration_span: - return - if self._evaluator_runner: - self._evaluator_runner.enqueue(span_event, span) - - def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]: - """Span event object structure.""" - span_kind = span._get_ctx_item(SPAN_KIND) - if not span_kind: - raise KeyError("Span kind not found in span context") - meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}} - if span_kind in ("llm", "embedding") and span._get_ctx_item(MODEL_NAME) is not None: - meta["model_name"] = span._get_ctx_item(MODEL_NAME) - meta["model_provider"] = (span._get_ctx_item(MODEL_PROVIDER) or "custom").lower() - meta["metadata"] = span._get_ctx_item(METADATA) or {} - if span._get_ctx_item(INPUT_PARAMETERS): - meta["input"]["parameters"] = span._get_ctx_item(INPUT_PARAMETERS) - if span_kind == "llm" and span._get_ctx_item(INPUT_MESSAGES) is not None: - meta["input"]["messages"] = span._get_ctx_item(INPUT_MESSAGES) - if span._get_ctx_item(INPUT_VALUE) is not None: - meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE)) - if span_kind == "llm" and span._get_ctx_item(OUTPUT_MESSAGES) is not None: - meta["output"]["messages"] = span._get_ctx_item(OUTPUT_MESSAGES) - if span_kind == "embedding" and span._get_ctx_item(INPUT_DOCUMENTS) is not None: - meta["input"]["documents"] = span._get_ctx_item(INPUT_DOCUMENTS) - if span._get_ctx_item(OUTPUT_VALUE) is not None: - meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE)) - if span_kind == "retrieval" and span._get_ctx_item(OUTPUT_DOCUMENTS) is not None: - meta["output"]["documents"] = span._get_ctx_item(OUTPUT_DOCUMENTS) - if span._get_ctx_item(INPUT_PROMPT) is not None: - prompt_json_str = span._get_ctx_item(INPUT_PROMPT) - if span_kind != "llm": - log.warning( - "Dropping prompt on non-LLM span kind, annotating prompts is only supported for LLM span kinds." - ) - else: - meta["input"]["prompt"] = prompt_json_str - if span.error: - meta.update( - { - ERROR_MSG: span.get_tag(ERROR_MSG), - ERROR_STACK: span.get_tag(ERROR_STACK), - ERROR_TYPE: span.get_tag(ERROR_TYPE), - } - ) - if not meta["input"]: - meta.pop("input") - if not meta["output"]: - meta.pop("output") - metrics = span._get_ctx_item(METRICS) or {} - ml_app = _get_ml_app(span) - - is_ragas_integration_span = False - - if ml_app.startswith(RAGAS_ML_APP_PREFIX): - is_ragas_integration_span = True - - span._set_ctx_item(ML_APP, ml_app) - parent_id = str(_get_llmobs_parent_id(span) or "undefined") - - llmobs_span_event = { - "trace_id": "{:x}".format(span.trace_id), - "span_id": str(span.span_id), - "parent_id": parent_id, - "name": _get_span_name(span), - "start_ns": span.start_ns, - "duration": span.duration_ns, - "status": "error" if span.error else "ok", - "meta": meta, - "metrics": metrics, - } - session_id = _get_session_id(span) - if session_id is not None: - span._set_ctx_item(SESSION_ID, session_id) - llmobs_span_event["session_id"] = session_id - - llmobs_span_event["tags"] = self._llmobs_tags( - span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span - ) - return llmobs_span_event, is_ragas_integration_span - - @staticmethod - def _llmobs_tags( - span: Span, ml_app: str, session_id: Optional[str] = None, is_ragas_integration_span: bool = False - ) -> List[str]: - tags = { - "version": config.version or "", - "env": config.env or "", - "service": span.service or "", - "source": "integration", - "ml_app": ml_app, - "ddtrace.version": ddtrace.__version__, - "language": "python", - "error": span.error, - } - err_type = span.get_tag(ERROR_TYPE) - if err_type: - tags["error_type"] = err_type - if session_id: - tags["session_id"] = session_id - if is_ragas_integration_span: - tags[RUNNER_IS_INTEGRATION_SPAN_TAG] = "ragas" - existing_tags = span._get_ctx_item(TAGS) - if existing_tags is not None: - tags.update(existing_tags) - return ["{}:{}".format(k, v) for k, v in tags.items()] diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py index c1b1c4a776c..dd616db8bef 100644 --- a/ddtrace/llmobs/_utils.py +++ b/ddtrace/llmobs/_utils.py @@ -135,9 +135,12 @@ def _get_ml_app(span: Span) -> str: ml_app = span._get_ctx_item(ML_APP) if ml_app: return ml_app - nearest_llmobs_ancestor = _get_nearest_llmobs_ancestor(span) - if nearest_llmobs_ancestor: - ml_app = nearest_llmobs_ancestor._get_ctx_item(ML_APP) + llmobs_parent = _get_nearest_llmobs_ancestor(span) + while llmobs_parent: + ml_app = llmobs_parent._get_ctx_item(ML_APP) + if ml_app is not None: + return ml_app + llmobs_parent = _get_nearest_llmobs_ancestor(llmobs_parent) return ml_app or config._llmobs_ml_app or "unknown-ml-app" @@ -149,9 +152,12 @@ def _get_session_id(span: Span) -> Optional[str]: session_id = span._get_ctx_item(SESSION_ID) if session_id: return session_id - nearest_llmobs_ancestor = _get_nearest_llmobs_ancestor(span) - if nearest_llmobs_ancestor: - session_id = nearest_llmobs_ancestor._get_ctx_item(SESSION_ID) + llmobs_parent = _get_nearest_llmobs_ancestor(span) + while llmobs_parent: + session_id = llmobs_parent._get_ctx_item(SESSION_ID) + if session_id is not None: + return session_id + llmobs_parent = _get_nearest_llmobs_ancestor(llmobs_parent) return session_id diff --git a/releasenotes/notes/fix-llmobs-processor-4afd715a84323d32.yaml b/releasenotes/notes/fix-llmobs-processor-4afd715a84323d32.yaml new file mode 100644 index 00000000000..5912a415022 --- /dev/null +++ b/releasenotes/notes/fix-llmobs-processor-4afd715a84323d32.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + LLM Observability: Resolves an issue where configuring custom trace filters/processors onto the tracer would disable LLM Observability. + Note that if LLM Observability is enabled in agentless mode, writing APM traces must be explicitly disabled by setting `DD_TRACE_ENABLED=0`. diff --git a/riotfile.py b/riotfile.py index 0d9f66ca925..0398175d930 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2958,8 +2958,8 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT name="llmobs", command="pytest {cmdargs} tests/llmobs", pkgs={"vcrpy": latest, "pytest-asyncio": "==0.21.1"}, - pys=select_pys(min_version="3.7"), venvs=[ + Venv(pys="3.7"), Venv(pys=select_pys(min_version="3.8"), pkgs={"ragas": "==0.1.21", "langchain": latest}), ], ), diff --git a/tests/llmobs/conftest.py b/tests/llmobs/conftest.py index a7d467b3985..15cffe5faa9 100644 --- a/tests/llmobs/conftest.py +++ b/tests/llmobs/conftest.py @@ -41,16 +41,6 @@ def mock_llmobs_span_writer(): patcher.stop() -@pytest.fixture -def mock_llmobs_span_agentless_writer(): - patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") - LLMObsSpanWriterMock = patcher.start() - m = mock.MagicMock() - LLMObsSpanWriterMock.return_value = m - yield m - patcher.stop() - - @pytest.fixture def mock_llmobs_eval_metric_writer(): patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsEvalMetricWriter") @@ -85,10 +75,7 @@ def mock_llmobs_submit_evaluation(): def mock_http_writer_send_payload_response(): with mock.patch( "ddtrace.internal.writer.HTTPWriter._send_payload", - return_value=Response( - status=200, - body="{}", - ), + return_value=Response(status=200, body="{}"), ): yield @@ -124,9 +111,10 @@ def mock_evaluator_sampler_logs(): @pytest.fixture -def mock_http_writer_logs(): - with mock.patch("ddtrace.internal.writer.writer.log") as m: +def mock_llmobs_logs(): + with mock.patch("ddtrace.llmobs._llmobs.log") as m: yield m + m.reset_mock() @pytest.fixture @@ -154,7 +142,7 @@ def LLMObs( @pytest.fixture def AgentlessLLMObs( - mock_llmobs_span_agentless_writer, + mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner, ddtrace_global_config, @@ -191,16 +179,20 @@ def mock_ragas_dependencies_not_present(): @pytest.fixture def ragas(mock_llmobs_span_writer, mock_llmobs_eval_metric_writer): with override_global_config(dict(_dd_api_key="")): - import ragas - + try: + import ragas + except ImportError: + pytest.skip("Ragas not installed") with override_env(dict(OPENAI_API_KEY=os.getenv("OPENAI_API_KEY", ""))): yield ragas @pytest.fixture def reset_ragas_faithfulness_llm(): - import ragas - + try: + import ragas + except ImportError: + pytest.skip("Ragas not installed") previous_llm = ragas.metrics.faithfulness.llm yield ragas.metrics.faithfulness.llm = previous_llm @@ -243,16 +235,25 @@ def llmobs_span_writer(): @pytest.fixture -def llmobs(monkeypatch, tracer, llmobs_env, llmobs_span_writer): +def llmobs( + ddtrace_global_config, + monkeypatch, + tracer, + llmobs_env, + llmobs_span_writer, + mock_llmobs_eval_metric_writer, + mock_llmobs_evaluator_runner, +): for env, val in llmobs_env.items(): monkeypatch.setenv(env, val) - + global_config = default_global_config() + global_config.update(dict(_llmobs_ml_app=llmobs_env.get("DD_LLMOBS_ML_APP"))) + global_config.update(ddtrace_global_config) # TODO: remove once rest of tests are moved off of global config tampering - with override_global_config(dict(_llmobs_ml_app=llmobs_env.get("DD_LLMOBS_ML_APP"))): + with override_global_config(global_config): llmobs_service.enable(_tracer=tracer) llmobs_service._instance._llmobs_span_writer = llmobs_span_writer - llmobs_service._instance._trace_processor._span_writer = llmobs_span_writer - yield llmobs + yield llmobs_service llmobs_service.disable() diff --git a/tests/llmobs/test_llmobs.py b/tests/llmobs/test_llmobs.py index 1bae7efe9ed..6cf19fc3e2c 100644 --- a/tests/llmobs/test_llmobs.py +++ b/tests/llmobs/test_llmobs.py @@ -1,4 +1,3 @@ -import mock import pytest from ddtrace.ext import SpanTypes @@ -8,12 +7,6 @@ from tests.llmobs._utils import _expected_llmobs_llm_span_event -@pytest.fixture -def mock_logs(): - with mock.patch("ddtrace.llmobs._trace_processor.log") as mock_logs: - yield mock_logs - - class TestMLApp: @pytest.mark.parametrize("llmobs_env", [{"DD_LLMOBS_ML_APP": ""}]) def test_tag_defaults_to_env_var(self, tracer, llmobs_env, llmobs_events): @@ -228,19 +221,19 @@ def test_model_and_provider_are_set(tracer, llmobs_events): assert span_event["meta"]["model_provider"] == "model_provider" -def test_malformed_span_logs_error_instead_of_raising(mock_logs, tracer, llmobs_events): +def test_malformed_span_logs_error_instead_of_raising(tracer, llmobs_events, mock_llmobs_logs): """Test that a trying to create a span event from a malformed span will log an error instead of crashing.""" with tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span: # span does not have SPAN_KIND tag pass - mock_logs.error.assert_called_once_with( - "Error generating LLMObs span event for span %s, likely due to malformed span", llm_span + mock_llmobs_logs.error.assert_called_with( + "Error generating LLMObs span event for span %s, likely due to malformed span", llm_span, exc_info=True ) assert len(llmobs_events) == 0 -def test_processor_only_creates_llmobs_span_event(tracer, llmobs_events): - """Test that the LLMObsTraceProcessor only creates LLMObs span events for LLM span types.""" +def test_only_generate_span_events_from_llmobs_spans(tracer, llmobs_events): + """Test that we only generate LLMObs span events for LLM span types.""" with tracer.trace("root_llm_span", service="tests.llmobs", span_type=SpanTypes.LLM) as root_span: root_span._set_ctx_item(const.SPAN_KIND, "llm") with tracer.trace("child_span"): @@ -250,5 +243,5 @@ def test_processor_only_creates_llmobs_span_event(tracer, llmobs_events): expected_grandchild_llmobs_span["parent_id"] = str(root_span.span_id) assert len(llmobs_events) == 2 - assert llmobs_events[0] == _expected_llmobs_llm_span_event(root_span, "llm") - assert llmobs_events[1] == expected_grandchild_llmobs_span + assert llmobs_events[1] == _expected_llmobs_llm_span_event(root_span, "llm") + assert llmobs_events[0] == expected_grandchild_llmobs_span diff --git a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py index 1f78b538f24..ec8e181e527 100644 --- a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py +++ b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py @@ -11,6 +11,9 @@ from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_prompt +pytest.importorskip("ragas", reason="Tests require ragas to be available on user env") + + def _llm_span_without_io(): return _expected_llmobs_llm_span_event(Span("dummy")) @@ -167,19 +170,17 @@ def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, L @pytest.mark.vcr_logs -def test_ragas_faithfulness_emits_traces(ragas, LLMObs): - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) +def test_ragas_faithfulness_emits_traces(ragas, llmobs, llmobs_events): + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) rf_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt()) - assert rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 7 - calls = rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_args_list - - spans = [call[0][0] for call in calls] - + ragas_spans = [event for event in llmobs_events if event["name"].startswith("dd-ragas.")] + ragas_spans = sorted(ragas_spans, key=lambda d: d["start_ns"]) + assert len(ragas_spans) == 7 # check name, io, span kinds match - assert spans == _expected_ragas_spans() + assert ragas_spans == _expected_ragas_spans() # verify the trace structure - root_span = spans[0] + root_span = ragas_spans[0] root_span_id = root_span["span_id"] assert root_span["parent_id"] == "undefined" assert root_span["meta"] is not None @@ -187,16 +188,15 @@ def test_ragas_faithfulness_emits_traces(ragas, LLMObs): assert isinstance(root_span["meta"]["metadata"]["faithfulness_list"], list) assert isinstance(root_span["meta"]["metadata"]["statements"], list) root_span_trace_id = root_span["trace_id"] - for child_span in spans[1:]: + for child_span in ragas_spans[1:]: assert child_span["trace_id"] == root_span_trace_id - assert spans[1]["parent_id"] == root_span_id # input extraction (task) - assert spans[2]["parent_id"] == root_span_id # create statements (workflow) - assert spans[4]["parent_id"] == root_span_id # create verdicts (workflow) - assert spans[6]["parent_id"] == root_span_id # create score (task) - - assert spans[3]["parent_id"] == spans[2]["span_id"] # create statements prompt (task) - assert spans[5]["parent_id"] == spans[4]["span_id"] # create verdicts prompt (task) + assert ragas_spans[1]["parent_id"] == root_span_id # input extraction (task) + assert ragas_spans[2]["parent_id"] == root_span_id # create statements (workflow) + assert ragas_spans[4]["parent_id"] == root_span_id # create verdicts (workflow) + assert ragas_spans[6]["parent_id"] == root_span_id # create score (task) + assert ragas_spans[3]["parent_id"] == ragas_spans[2]["span_id"] # create statements prompt (task) + assert ragas_spans[5]["parent_id"] == ragas_spans[4]["span_id"] # create verdicts prompt (task) def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_logs, run_python_code_in_subprocess): diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index 98748250c3a..2ba5754019f 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -7,9 +7,7 @@ import ddtrace from ddtrace._trace.context import Context -from ddtrace._trace.span import Span from ddtrace.ext import SpanTypes -from ddtrace.filters import TraceFilter from ddtrace.internal.service import ServiceStatus from ddtrace.llmobs import LLMObs as llmobs_service from ddtrace.llmobs._constants import INPUT_DOCUMENTS @@ -31,7 +29,6 @@ from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING from ddtrace.llmobs._constants import TAGS from ddtrace.llmobs._llmobs import SUPPORTED_LLMOBS_INTEGRATIONS -from ddtrace.llmobs._llmobs import LLMObsTraceProcessor from ddtrace.llmobs.utils import Prompt from tests.llmobs._utils import _expected_llmobs_eval_metric_event from tests.llmobs._utils import _expected_llmobs_llm_span_event @@ -41,20 +38,13 @@ from tests.utils import override_global_config -@pytest.fixture -def mock_logs(): - with mock.patch("ddtrace.llmobs._llmobs.log") as mock_logs: - yield mock_logs +RAGAS_AVAILABLE = os.getenv("RAGAS_AVAILABLE", False) def run_llmobs_trace_filter(dummy_tracer): - for trace_filter in dummy_tracer._filters: - if isinstance(trace_filter, LLMObsTraceProcessor): - root_llm_span = Span(name="span1", span_type=SpanTypes.LLM) - root_llm_span.set_tag_str(SPAN_KIND, "llm") - trace1 = [root_llm_span] - return trace_filter.process_trace(trace1) - raise ValueError("LLMObsTraceProcessor not found in tracer filters.") + with dummy_tracer.trace("span1", span_type=SpanTypes.LLM) as span: + span.set_tag_str(SPAN_KIND, "llm") + return dummy_tracer._writer.pop() def test_service_enable(): @@ -65,26 +55,11 @@ def test_service_enable(): assert llmobs_instance is not None assert llmobs_service.enabled assert llmobs_instance.tracer == dummy_tracer - assert any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in dummy_tracer._filters) assert run_llmobs_trace_filter(dummy_tracer) is not None llmobs_service.disable() -def test_service_enable_with_apm_disabled(monkeypatch): - with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): - dummy_tracer = DummyTracer() - llmobs_service.enable(_tracer=dummy_tracer, agentless_enabled=True) - llmobs_instance = llmobs_service._instance - assert llmobs_instance is not None - assert llmobs_service.enabled - assert llmobs_instance.tracer == dummy_tracer - assert any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in dummy_tracer._filters) - assert run_llmobs_trace_filter(dummy_tracer) is None - - llmobs_service.disable() - - def test_service_disable(): with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): dummy_tracer = DummyTracer() @@ -118,7 +93,7 @@ def test_service_enable_no_ml_app_specified(): assert llmobs_service._instance._evaluator_runner.status.value == "stopped" -def test_service_enable_deprecated_ml_app_name(monkeypatch, mock_logs): +def test_service_enable_deprecated_ml_app_name(monkeypatch, mock_llmobs_logs): with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): dummy_tracer = DummyTracer() monkeypatch.setenv("DD_LLMOBS_APP_NAME", "test_ml_app") @@ -126,11 +101,13 @@ def test_service_enable_deprecated_ml_app_name(monkeypatch, mock_logs): assert llmobs_service.enabled is True assert llmobs_service._instance._llmobs_eval_metric_writer.status.value == "running" assert llmobs_service._instance._llmobs_span_writer.status.value == "running" - mock_logs.warning.assert_called_once_with("`DD_LLMOBS_APP_NAME` is deprecated. Use `DD_LLMOBS_ML_APP` instead.") + mock_llmobs_logs.warning.assert_called_once_with( + "`DD_LLMOBS_APP_NAME` is deprecated. Use `DD_LLMOBS_ML_APP` instead." + ) llmobs_service.disable() -def test_service_enable_already_enabled(mock_logs): +def test_service_enable_already_enabled(mock_llmobs_logs): with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): dummy_tracer = DummyTracer() llmobs_service.enable(_tracer=dummy_tracer) @@ -139,9 +116,8 @@ def test_service_enable_already_enabled(mock_logs): assert llmobs_instance is not None assert llmobs_service.enabled assert llmobs_instance.tracer == dummy_tracer - assert any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in dummy_tracer._filters) llmobs_service.disable() - mock_logs.debug.assert_has_calls([mock.call("%s already enabled", "LLMObs")]) + mock_llmobs_logs.debug.assert_has_calls([mock.call("%s already enabled", "LLMObs")]) @mock.patch("ddtrace.llmobs._llmobs.patch") @@ -203,107 +179,83 @@ def test_service_enable_does_not_override_global_patch_config(mock_tracer_patch, llmobs_service.disable() -def test_start_span_while_disabled_logs_warning(LLMObs, mock_logs): - LLMObs.disable() - _ = LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") - mock_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) - mock_logs.reset_mock() - _ = LLMObs.tool(name="test_tool") - mock_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) - mock_logs.reset_mock() - _ = LLMObs.task(name="test_task") - mock_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) - mock_logs.reset_mock() - _ = LLMObs.workflow(name="test_workflow") - mock_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) - mock_logs.reset_mock() - _ = LLMObs.agent(name="test_agent") - mock_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) - - -def test_start_span_uses_kind_as_default_name(LLMObs): - with LLMObs.llm(model_name="test_model", model_provider="test_provider") as span: +def test_start_span_while_disabled_logs_warning(llmobs, mock_llmobs_logs): + llmobs.disable() + _ = llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") + mock_llmobs_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) + mock_llmobs_logs.reset_mock() + _ = llmobs.tool(name="test_tool") + mock_llmobs_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) + mock_llmobs_logs.reset_mock() + _ = llmobs.task(name="test_task") + mock_llmobs_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) + mock_llmobs_logs.reset_mock() + _ = llmobs.workflow(name="test_workflow") + mock_llmobs_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) + mock_llmobs_logs.reset_mock() + _ = llmobs.agent(name="test_agent") + mock_llmobs_logs.warning.assert_called_once_with(SPAN_START_WHILE_DISABLED_WARNING) + + +def test_start_span_uses_kind_as_default_name(llmobs): + with llmobs.llm(model_name="test_model", model_provider="test_provider") as span: assert span.name == "llm" - with LLMObs.tool() as span: + with llmobs.tool() as span: assert span.name == "tool" - with LLMObs.task() as span: + with llmobs.task() as span: assert span.name == "task" - with LLMObs.workflow() as span: + with llmobs.workflow() as span: assert span.name == "workflow" - with LLMObs.agent() as span: + with llmobs.agent() as span: assert span.name == "agent" -def test_start_span_with_session_id(LLMObs): - with LLMObs.llm(model_name="test_model", session_id="test_session_id") as span: +def test_start_span_with_session_id(llmobs): + with llmobs.llm(model_name="test_model", session_id="test_session_id") as span: assert span._get_ctx_item(SESSION_ID) == "test_session_id" - with LLMObs.tool(session_id="test_session_id") as span: + with llmobs.tool(session_id="test_session_id") as span: assert span._get_ctx_item(SESSION_ID) == "test_session_id" - with LLMObs.task(session_id="test_session_id") as span: + with llmobs.task(session_id="test_session_id") as span: assert span._get_ctx_item(SESSION_ID) == "test_session_id" - with LLMObs.workflow(session_id="test_session_id") as span: + with llmobs.workflow(session_id="test_session_id") as span: assert span._get_ctx_item(SESSION_ID) == "test_session_id" - with LLMObs.agent(session_id="test_session_id") as span: + with llmobs.agent(session_id="test_session_id") as span: assert span._get_ctx_item(SESSION_ID) == "test_session_id" -def test_session_id_becomes_top_level_field(LLMObs, mock_llmobs_span_writer): - session_id = "test_session_id" - with LLMObs.task(session_id=session_id) as span: - pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "task", session_id=session_id) - ) - - -def test_session_id_becomes_top_level_field_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): +def test_session_id_becomes_top_level_field(llmobs, llmobs_events): session_id = "test_session_id" - with AgentlessLLMObs.task(session_id=session_id) as span: + with llmobs.task(session_id=session_id) as span: pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "task", session_id=session_id) - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "task", session_id=session_id) -def test_llm_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: +def test_llm_span(llmobs, llmobs_events): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: assert span.name == "test_llm_call" assert span.resource == "llm" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "llm" assert span._get_ctx_item(MODEL_NAME) == "test_model" assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider" - - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "llm", model_name="test_model", model_provider="test_provider") - ) - - -def test_llm_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - assert span.name == "test_llm_call" - assert span.resource == "llm" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "llm" - assert span._get_ctx_item(MODEL_NAME) == "test_model" - assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider" - - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "llm", model_name="test_model", model_provider="test_provider") + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "llm", model_name="test_model", model_provider="test_provider" ) -def test_llm_span_no_model_sets_default(LLMObs, mock_llmobs_span_writer): - with LLMObs.llm(name="test_llm_call", model_provider="test_provider") as span: +def test_llm_span_no_model_sets_default(llmobs, llmobs_events): + with llmobs.llm(name="test_llm_call", model_provider="test_provider") as span: assert span._get_ctx_item(MODEL_NAME) == "custom" - - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "llm", model_name="custom", model_provider="test_provider") + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "llm", model_name="custom", model_provider="test_provider" ) -def test_default_model_provider_set_to_custom(LLMObs): - with LLMObs.llm(model_name="test_model", name="test_llm_call") as span: +def test_default_model_provider_set_to_custom(llmobs): + with llmobs.llm(model_name="test_model", name="test_llm_call") as span: assert span.name == "test_llm_call" assert span.resource == "llm" assert span.span_type == "llm" @@ -312,88 +264,57 @@ def test_default_model_provider_set_to_custom(LLMObs): assert span._get_ctx_item(MODEL_PROVIDER) == "custom" -def test_tool_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.tool(name="test_tool") as span: +def test_tool_span(llmobs, llmobs_events): + with llmobs.tool(name="test_tool") as span: assert span.name == "test_tool" assert span.resource == "tool" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "tool" - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "tool")) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "tool") -def test_tool_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.tool(name="test_tool") as span: - assert span.name == "test_tool" - assert span.resource == "tool" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "tool" - mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "tool")) - - -def test_task_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.task(name="test_task") as span: - assert span.name == "test_task" - assert span.resource == "task" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "task" - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "task")) - - -def test_task_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.task(name="test_task") as span: +def test_task_span(llmobs, llmobs_events): + with llmobs.task(name="test_task") as span: assert span.name == "test_task" assert span.resource == "task" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "task" - mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "task")) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "task") -def test_workflow_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.workflow(name="test_workflow") as span: +def test_workflow_span(llmobs, llmobs_events): + with llmobs.workflow(name="test_workflow") as span: assert span.name == "test_workflow" assert span.resource == "workflow" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "workflow" - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "workflow")) - - -def test_workflow_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.workflow(name="test_workflow") as span: - assert span.name == "test_workflow" - assert span.resource == "workflow" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "workflow" - mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "workflow")) - - -def test_agent_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.agent(name="test_agent") as span: - assert span.name == "test_agent" - assert span.resource == "agent" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "agent" - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent")) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "workflow") -def test_agent_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.agent(name="test_agent") as span: +def test_agent_span(llmobs, llmobs_events): + with llmobs.agent(name="test_agent") as span: assert span.name == "test_agent" assert span.resource == "agent" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "agent" - mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent")) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event(span, "agent") -def test_embedding_span_no_model_sets_default(LLMObs, mock_llmobs_span_writer): - with LLMObs.embedding(name="test_embedding", model_provider="test_provider") as span: +def test_embedding_span_no_model_sets_default(llmobs, llmobs_events): + with llmobs.embedding(name="test_embedding", model_provider="test_provider") as span: assert span._get_ctx_item(MODEL_NAME) == "custom" - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "embedding", model_name="custom", model_provider="test_provider") + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="custom", model_provider="test_provider" ) -def test_embedding_default_model_provider_set_to_custom(LLMObs): - with LLMObs.embedding(model_name="test_model", name="test_embedding") as span: +def test_embedding_default_model_provider_set_to_custom(llmobs): + with llmobs.embedding(model_name="test_model", name="test_embedding") as span: assert span.name == "test_embedding" assert span.resource == "embedding" assert span.span_type == "llm" @@ -402,198 +323,182 @@ def test_embedding_default_model_provider_set_to_custom(LLMObs): assert span._get_ctx_item(MODEL_PROVIDER) == "custom" -def test_embedding_span(LLMObs, mock_llmobs_span_writer): - with LLMObs.embedding(model_name="test_model", name="test_embedding", model_provider="test_provider") as span: - assert span.name == "test_embedding" - assert span.resource == "embedding" - assert span.span_type == "llm" - assert span._get_ctx_item(SPAN_KIND) == "embedding" - assert span._get_ctx_item(MODEL_NAME) == "test_model" - assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider" - - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "embedding", model_name="test_model", model_provider="test_provider") - ) - - -def test_embedding_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.embedding( - model_name="test_model", name="test_embedding", model_provider="test_provider" - ) as span: +def test_embedding_span(llmobs, llmobs_events): + with llmobs.embedding(model_name="test_model", name="test_embedding", model_provider="test_provider") as span: assert span.name == "test_embedding" assert span.resource == "embedding" assert span.span_type == "llm" assert span._get_ctx_item(SPAN_KIND) == "embedding" assert span._get_ctx_item(MODEL_NAME) == "test_model" assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider" - - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "embedding", model_name="test_model", model_provider="test_provider") + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="test_model", model_provider="test_provider" ) -def test_annotate_no_active_span_logs_warning(LLMObs, mock_logs): - LLMObs.annotate(parameters={"test": "test"}) - mock_logs.warning.assert_called_once_with("No span provided and no active LLMObs-generated span found.") +def test_annotate_no_active_span_logs_warning(llmobs, mock_llmobs_logs): + llmobs.annotate(parameters={"test": "test"}) + mock_llmobs_logs.warning.assert_called_once_with("No span provided and no active LLMObs-generated span found.") -def test_annotate_non_llm_span_logs_warning(LLMObs, mock_logs): +def test_annotate_non_llm_span_logs_warning(llmobs, mock_llmobs_logs): dummy_tracer = DummyTracer() with dummy_tracer.trace("root") as non_llmobs_span: - LLMObs.annotate(span=non_llmobs_span, parameters={"test": "test"}) - mock_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") + llmobs.annotate(span=non_llmobs_span, parameters={"test": "test"}) + mock_llmobs_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") -def test_annotate_finished_span_does_nothing(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: +def test_annotate_finished_span_does_nothing(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: pass - LLMObs.annotate(span=span, parameters={"test": "test"}) - mock_logs.warning.assert_called_once_with("Cannot annotate a finished span.") + llmobs.annotate(span=span, parameters={"test": "test"}) + mock_llmobs_logs.warning.assert_called_once_with("Cannot annotate a finished span.") -def test_annotate_parameters(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.annotate(span=span, parameters={"temperature": 0.9, "max_tokens": 50}) +def test_annotate_parameters(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.annotate(span=span, parameters={"temperature": 0.9, "max_tokens": 50}) assert span._get_ctx_item(INPUT_PARAMETERS) == {"temperature": 0.9, "max_tokens": 50} - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "Setting parameters is deprecated, please set parameters and other metadata as tags instead." ) -def test_annotate_metadata(LLMObs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.annotate(span=span, metadata={"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3}) +def test_annotate_metadata(llmobs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.annotate(span=span, metadata={"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3}) assert span._get_ctx_item(METADATA) == {"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3} -def test_annotate_metadata_wrong_type_raises_warning(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.annotate(span=span, metadata="wrong_metadata") +def test_annotate_metadata_wrong_type_raises_warning(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.annotate(span=span, metadata="wrong_metadata") assert span._get_ctx_item(METADATA) is None - mock_logs.warning.assert_called_once_with("metadata must be a dictionary of string key-value pairs.") - mock_logs.reset_mock() + mock_llmobs_logs.warning.assert_called_once_with("metadata must be a dictionary of string key-value pairs.") + mock_llmobs_logs.reset_mock() -def test_annotate_tag(LLMObs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.annotate(span=span, tags={"test_tag_name": "test_tag_value", "test_numeric_tag": 10}) +def test_annotate_tag(llmobs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.annotate(span=span, tags={"test_tag_name": "test_tag_value", "test_numeric_tag": 10}) assert span._get_ctx_item(TAGS) == {"test_tag_name": "test_tag_value", "test_numeric_tag": 10} -def test_annotate_tag_wrong_type(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.annotate(span=span, tags=12345) +def test_annotate_tag_wrong_type(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.annotate(span=span, tags=12345) assert span._get_ctx_item(TAGS) is None - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "span_tags must be a dictionary of string key - primitive value pairs." ) -def test_annotate_input_string(LLMObs): - with LLMObs.llm(model_name="test_model") as llm_span: - LLMObs.annotate(span=llm_span, input_data="test_input") +def test_annotate_input_string(llmobs): + with llmobs.llm(model_name="test_model") as llm_span: + llmobs.annotate(span=llm_span, input_data="test_input") assert llm_span._get_ctx_item(INPUT_MESSAGES) == [{"content": "test_input"}] - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, input_data="test_input") + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, input_data="test_input") assert task_span._get_ctx_item(INPUT_VALUE) == "test_input" - with LLMObs.tool() as tool_span: - LLMObs.annotate(span=tool_span, input_data="test_input") + with llmobs.tool() as tool_span: + llmobs.annotate(span=tool_span, input_data="test_input") assert tool_span._get_ctx_item(INPUT_VALUE) == "test_input" - with LLMObs.workflow() as workflow_span: - LLMObs.annotate(span=workflow_span, input_data="test_input") + with llmobs.workflow() as workflow_span: + llmobs.annotate(span=workflow_span, input_data="test_input") assert workflow_span._get_ctx_item(INPUT_VALUE) == "test_input" - with LLMObs.agent() as agent_span: - LLMObs.annotate(span=agent_span, input_data="test_input") + with llmobs.agent() as agent_span: + llmobs.annotate(span=agent_span, input_data="test_input") assert agent_span._get_ctx_item(INPUT_VALUE) == "test_input" - with LLMObs.retrieval() as retrieval_span: - LLMObs.annotate(span=retrieval_span, input_data="test_input") + with llmobs.retrieval() as retrieval_span: + llmobs.annotate(span=retrieval_span, input_data="test_input") assert retrieval_span._get_ctx_item(INPUT_VALUE) == "test_input" -def test_annotate_numeric_io(LLMObs): - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, input_data=0, output_data=0) +def test_annotate_numeric_io(llmobs): + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, input_data=0, output_data=0) assert task_span._get_ctx_item(INPUT_VALUE) == "0" assert task_span._get_ctx_item(OUTPUT_VALUE) == "0" - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, input_data=1.23, output_data=1.23) + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, input_data=1.23, output_data=1.23) assert task_span._get_ctx_item(INPUT_VALUE) == "1.23" assert task_span._get_ctx_item(OUTPUT_VALUE) == "1.23" -def test_annotate_input_serializable_value(LLMObs): - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, input_data=["test_input"]) +def test_annotate_input_serializable_value(llmobs): + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, input_data=["test_input"]) assert task_span._get_ctx_item(INPUT_VALUE) == str(["test_input"]) - with LLMObs.tool() as tool_span: - LLMObs.annotate(span=tool_span, input_data={"test_input": "hello world"}) + with llmobs.tool() as tool_span: + llmobs.annotate(span=tool_span, input_data={"test_input": "hello world"}) assert tool_span._get_ctx_item(INPUT_VALUE) == str({"test_input": "hello world"}) - with LLMObs.workflow() as workflow_span: - LLMObs.annotate(span=workflow_span, input_data=("asd", 123)) + with llmobs.workflow() as workflow_span: + llmobs.annotate(span=workflow_span, input_data=("asd", 123)) assert workflow_span._get_ctx_item(INPUT_VALUE) == str(("asd", 123)) - with LLMObs.agent() as agent_span: - LLMObs.annotate(span=agent_span, input_data="test_input") + with llmobs.agent() as agent_span: + llmobs.annotate(span=agent_span, input_data="test_input") assert agent_span._get_ctx_item(INPUT_VALUE) == "test_input" - with LLMObs.retrieval() as retrieval_span: - LLMObs.annotate(span=retrieval_span, input_data=[0, 1, 2, 3, 4]) + with llmobs.retrieval() as retrieval_span: + llmobs.annotate(span=retrieval_span, input_data=[0, 1, 2, 3, 4]) assert retrieval_span._get_ctx_item(INPUT_VALUE) == str([0, 1, 2, 3, 4]) -def test_annotate_input_llm_message(LLMObs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data=[{"content": "test_input", "role": "human"}]) +def test_annotate_input_llm_message(llmobs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate(span=span, input_data=[{"content": "test_input", "role": "human"}]) assert span._get_ctx_item(INPUT_MESSAGES) == [{"content": "test_input", "role": "human"}] -def test_annotate_input_llm_message_wrong_type(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data=[{"content": object()}]) +def test_annotate_input_llm_message_wrong_type(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate(span=span, input_data=[{"content": object()}]) assert span._get_ctx_item(INPUT_MESSAGES) is None - mock_logs.warning.assert_called_once_with("Failed to parse input messages.", exc_info=True) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input messages.", exc_info=True) -def test_llmobs_annotate_incorrect_message_content_type_raises_warning(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data={"role": "user", "content": {"nested": "yes"}}) - mock_logs.warning.assert_called_once_with("Failed to parse input messages.", exc_info=True) - mock_logs.reset_mock() - LLMObs.annotate(span=span, output_data={"role": "user", "content": {"nested": "yes"}}) - mock_logs.warning.assert_called_once_with("Failed to parse output messages.", exc_info=True) +def test_llmobs_annotate_incorrect_message_content_type_raises_warning(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate(span=span, input_data={"role": "user", "content": {"nested": "yes"}}) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input messages.", exc_info=True) + mock_llmobs_logs.reset_mock() + llmobs.annotate(span=span, output_data={"role": "user", "content": {"nested": "yes"}}) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output messages.", exc_info=True) -def test_annotate_document_str(LLMObs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data="test_document_text") +def test_annotate_document_str(llmobs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate(span=span, input_data="test_document_text") documents = span._get_ctx_item(INPUT_DOCUMENTS) assert documents assert len(documents) == 1 assert documents[0]["text"] == "test_document_text" - with LLMObs.retrieval() as span: - LLMObs.annotate(span=span, output_data="test_document_text") + with llmobs.retrieval() as span: + llmobs.annotate(span=span, output_data="test_document_text") documents = span._get_ctx_item(OUTPUT_DOCUMENTS) assert documents assert len(documents) == 1 assert documents[0]["text"] == "test_document_text" -def test_annotate_document_dict(LLMObs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data={"text": "test_document_text"}) +def test_annotate_document_dict(llmobs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate(span=span, input_data={"text": "test_document_text"}) documents = span._get_ctx_item(INPUT_DOCUMENTS) assert documents assert len(documents) == 1 assert documents[0]["text"] == "test_document_text" - with LLMObs.retrieval() as span: - LLMObs.annotate(span=span, output_data={"text": "test_document_text"}) + with llmobs.retrieval() as span: + llmobs.annotate(span=span, output_data={"text": "test_document_text"}) documents = span._get_ctx_item(OUTPUT_DOCUMENTS) assert documents assert len(documents) == 1 assert documents[0]["text"] == "test_document_text" -def test_annotate_document_list(LLMObs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate( +def test_annotate_document_list(llmobs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate( span=span, input_data=[{"text": "test_document_text"}, {"text": "text", "name": "name", "score": 0.9, "id": "id"}], ) @@ -605,8 +510,8 @@ def test_annotate_document_list(LLMObs): assert documents[1]["name"] == "name" assert documents[1]["id"] == "id" assert documents[1]["score"] == 0.9 - with LLMObs.retrieval() as span: - LLMObs.annotate( + with llmobs.retrieval() as span: + llmobs.annotate( span=span, output_data=[{"text": "test_document_text"}, {"text": "text", "name": "name", "score": 0.9, "id": "id"}], ) @@ -620,129 +525,131 @@ def test_annotate_document_list(LLMObs): assert documents[1]["score"] == 0.9 -def test_annotate_incorrect_document_type_raises_warning(LLMObs, mock_logs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data={"text": 123}) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - LLMObs.annotate(span=span, input_data=123) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - LLMObs.annotate(span=span, input_data=object()) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - with LLMObs.retrieval() as span: - LLMObs.annotate(span=span, output_data=[{"score": 0.9, "id": "id", "name": "name"}]) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) - mock_logs.reset_mock() - LLMObs.annotate(span=span, output_data=123) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) - mock_logs.reset_mock() - LLMObs.annotate(span=span, output_data=object()) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) - - -def test_annotate_document_no_text_raises_warning(LLMObs, mock_logs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data=[{"score": 0.9, "id": "id", "name": "name"}]) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - with LLMObs.retrieval() as span: - LLMObs.annotate(span=span, output_data=[{"score": 0.9, "id": "id", "name": "name"}]) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) - - -def test_annotate_incorrect_document_field_type_raises_warning(LLMObs, mock_logs): - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate(span=span, input_data=[{"text": "test_document_text", "score": "0.9"}]) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - with LLMObs.embedding(model_name="test_model") as span: - LLMObs.annotate( +def test_annotate_incorrect_document_type_raises_warning(llmobs, mock_llmobs_logs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate(span=span, input_data={"text": 123}) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + llmobs.annotate(span=span, input_data=123) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + llmobs.annotate(span=span, input_data=object()) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + with llmobs.retrieval() as span: + llmobs.annotate(span=span, output_data=[{"score": 0.9, "id": "id", "name": "name"}]) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + llmobs.annotate(span=span, output_data=123) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + llmobs.annotate(span=span, output_data=object()) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + + +def test_annotate_document_no_text_raises_warning(llmobs, mock_llmobs_logs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate(span=span, input_data=[{"score": 0.9, "id": "id", "name": "name"}]) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + with llmobs.retrieval() as span: + llmobs.annotate(span=span, output_data=[{"score": 0.9, "id": "id", "name": "name"}]) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + + +def test_annotate_incorrect_document_field_type_raises_warning(llmobs, mock_llmobs_logs): + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate(span=span, input_data=[{"text": "test_document_text", "score": "0.9"}]) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + with llmobs.embedding(model_name="test_model") as span: + llmobs.annotate( span=span, input_data=[{"text": "text", "id": 123, "score": "0.9", "name": ["h", "e", "l", "l", "o"]}] ) - mock_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) - mock_logs.reset_mock() - with LLMObs.retrieval() as span: - LLMObs.annotate(span=span, output_data=[{"text": "test_document_text", "score": "0.9"}]) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) - mock_logs.reset_mock() - with LLMObs.retrieval() as span: - LLMObs.annotate( + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse input documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + with llmobs.retrieval() as span: + llmobs.annotate(span=span, output_data=[{"text": "test_document_text", "score": "0.9"}]) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + mock_llmobs_logs.reset_mock() + with llmobs.retrieval() as span: + llmobs.annotate( span=span, output_data=[{"text": "text", "id": 123, "score": "0.9", "name": ["h", "e", "l", "l", "o"]}] ) - mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True) -def test_annotate_output_string(LLMObs): - with LLMObs.llm(model_name="test_model") as llm_span: - LLMObs.annotate(span=llm_span, output_data="test_output") +def test_annotate_output_string(llmobs): + with llmobs.llm(model_name="test_model") as llm_span: + llmobs.annotate(span=llm_span, output_data="test_output") assert llm_span._get_ctx_item(OUTPUT_MESSAGES) == [{"content": "test_output"}] - with LLMObs.embedding(model_name="test_model") as embedding_span: - LLMObs.annotate(span=embedding_span, output_data="test_output") + with llmobs.embedding(model_name="test_model") as embedding_span: + llmobs.annotate(span=embedding_span, output_data="test_output") assert embedding_span._get_ctx_item(OUTPUT_VALUE) == "test_output" - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, output_data="test_output") + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, output_data="test_output") assert task_span._get_ctx_item(OUTPUT_VALUE) == "test_output" - with LLMObs.tool() as tool_span: - LLMObs.annotate(span=tool_span, output_data="test_output") + with llmobs.tool() as tool_span: + llmobs.annotate(span=tool_span, output_data="test_output") assert tool_span._get_ctx_item(OUTPUT_VALUE) == "test_output" - with LLMObs.workflow() as workflow_span: - LLMObs.annotate(span=workflow_span, output_data="test_output") + with llmobs.workflow() as workflow_span: + llmobs.annotate(span=workflow_span, output_data="test_output") assert workflow_span._get_ctx_item(OUTPUT_VALUE) == "test_output" - with LLMObs.agent() as agent_span: - LLMObs.annotate(span=agent_span, output_data="test_output") + with llmobs.agent() as agent_span: + llmobs.annotate(span=agent_span, output_data="test_output") assert agent_span._get_ctx_item(OUTPUT_VALUE) == "test_output" -def test_annotate_output_serializable_value(LLMObs): - with LLMObs.embedding(model_name="test_model") as embedding_span: - LLMObs.annotate(span=embedding_span, output_data=[[0, 1, 2, 3], [4, 5, 6, 7]]) +def test_annotate_output_serializable_value(llmobs): + with llmobs.embedding(model_name="test_model") as embedding_span: + llmobs.annotate(span=embedding_span, output_data=[[0, 1, 2, 3], [4, 5, 6, 7]]) assert embedding_span._get_ctx_item(OUTPUT_VALUE) == str([[0, 1, 2, 3], [4, 5, 6, 7]]) - with LLMObs.task() as task_span: - LLMObs.annotate(span=task_span, output_data=["test_output"]) + with llmobs.task() as task_span: + llmobs.annotate(span=task_span, output_data=["test_output"]) assert task_span._get_ctx_item(OUTPUT_VALUE) == str(["test_output"]) - with LLMObs.tool() as tool_span: - LLMObs.annotate(span=tool_span, output_data={"test_output": "hello world"}) + with llmobs.tool() as tool_span: + llmobs.annotate(span=tool_span, output_data={"test_output": "hello world"}) assert tool_span._get_ctx_item(OUTPUT_VALUE) == str({"test_output": "hello world"}) - with LLMObs.workflow() as workflow_span: - LLMObs.annotate(span=workflow_span, output_data=("asd", 123)) + with llmobs.workflow() as workflow_span: + llmobs.annotate(span=workflow_span, output_data=("asd", 123)) assert workflow_span._get_ctx_item(OUTPUT_VALUE) == str(("asd", 123)) - with LLMObs.agent() as agent_span: - LLMObs.annotate(span=agent_span, output_data="test_output") + with llmobs.agent() as agent_span: + llmobs.annotate(span=agent_span, output_data="test_output") assert agent_span._get_ctx_item(OUTPUT_VALUE) == "test_output" -def test_annotate_output_llm_message(LLMObs): - with LLMObs.llm(model_name="test_model") as llm_span: - LLMObs.annotate(span=llm_span, output_data=[{"content": "test_output", "role": "human"}]) +def test_annotate_output_llm_message(llmobs): + with llmobs.llm(model_name="test_model") as llm_span: + llmobs.annotate(span=llm_span, output_data=[{"content": "test_output", "role": "human"}]) assert llm_span._get_ctx_item(OUTPUT_MESSAGES) == [{"content": "test_output", "role": "human"}] -def test_annotate_output_llm_message_wrong_type(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model") as llm_span: - LLMObs.annotate(span=llm_span, output_data=[{"content": object()}]) +def test_annotate_output_llm_message_wrong_type(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model") as llm_span: + llmobs.annotate(span=llm_span, output_data=[{"content": object()}]) assert llm_span._get_ctx_item(OUTPUT_MESSAGES) is None - mock_logs.warning.assert_called_once_with("Failed to parse output messages.", exc_info=True) + mock_llmobs_logs.warning.assert_called_once_with("Failed to parse output messages.", exc_info=True) -def test_annotate_metrics(LLMObs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate(span=span, metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}) +def test_annotate_metrics(llmobs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate(span=span, metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}) assert span._get_ctx_item(METRICS) == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30} -def test_annotate_metrics_wrong_type(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model") as llm_span: - LLMObs.annotate(span=llm_span, metrics=12345) +def test_annotate_metrics_wrong_type(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model") as llm_span: + llmobs.annotate(span=llm_span, metrics=12345) assert llm_span._get_ctx_item(METRICS) is None - mock_logs.warning.assert_called_once_with("metrics must be a dictionary of string key - numeric value pairs.") - mock_logs.reset_mock() + mock_llmobs_logs.warning.assert_called_once_with( + "metrics must be a dictionary of string key - numeric value pairs." + ) + mock_llmobs_logs.reset_mock() -def test_annotate_prompt_dict(LLMObs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate( +def test_annotate_prompt_dict(llmobs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate( span=span, prompt={ "template": "{var1} {var3}", @@ -761,9 +668,9 @@ def test_annotate_prompt_dict(LLMObs): } -def test_annotate_prompt_dict_with_context_var_keys(LLMObs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate( +def test_annotate_prompt_dict_with_context_var_keys(llmobs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate( span=span, prompt={ "template": "{var1} {var3}", @@ -784,9 +691,9 @@ def test_annotate_prompt_dict_with_context_var_keys(LLMObs): } -def test_annotate_prompt_typed_dict(LLMObs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate( +def test_annotate_prompt_typed_dict(llmobs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate( span=span, prompt=Prompt( template="{var1} {var3}", @@ -807,47 +714,30 @@ def test_annotate_prompt_typed_dict(LLMObs): } -def test_annotate_prompt_wrong_type(LLMObs, mock_logs): - with LLMObs.llm(model_name="test_model") as span: - LLMObs.annotate(span=span, prompt="prompt") +def test_annotate_prompt_wrong_type(llmobs, mock_llmobs_logs): + with llmobs.llm(model_name="test_model") as span: + llmobs.annotate(span=span, prompt="prompt") assert span._get_ctx_item(INPUT_PROMPT) is None - mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True) - mock_logs.reset_mock() + mock_llmobs_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True) + mock_llmobs_logs.reset_mock() - LLMObs.annotate(span=span, prompt={"template": 1}) - mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True) - mock_logs.reset_mock() + llmobs.annotate(span=span, prompt={"template": 1}) + mock_llmobs_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True) + mock_llmobs_logs.reset_mock() -def test_span_error_sets_error(LLMObs, mock_llmobs_span_writer): +def test_span_error_sets_error(llmobs, llmobs_events): with pytest.raises(ValueError): - with LLMObs.llm(model_name="test_model", model_provider="test_model_provider") as span: + with llmobs.llm(model_name="test_model", model_provider="test_model_provider") as span: raise ValueError("test error message") - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="test_model", - model_provider="test_model_provider", - error="builtins.ValueError", - error_message="test error message", - error_stack=span.get_tag("error.stack"), - ) - ) - - -def test_span_error_sets_error_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with pytest.raises(ValueError): - with AgentlessLLMObs.llm(model_name="test_model", model_provider="test_model_provider") as span: - raise ValueError("test error message") - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="test_model", - model_provider="test_model_provider", - error="builtins.ValueError", - error_message="test error message", - error_stack=span.get_tag("error.stack"), - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="test_model", + model_provider="test_model_provider", + error="builtins.ValueError", + error_message="test error message", + error_stack=span.get_tag("error.stack"), ) @@ -855,218 +745,152 @@ def test_span_error_sets_error_agentless(AgentlessLLMObs, mock_llmobs_span_agent "ddtrace_global_config", [dict(version="1.2.3", env="test_env", service="test_service", _llmobs_ml_app="test_app_name")], ) -def test_tags(ddtrace_global_config, LLMObs, mock_llmobs_span_writer, monkeypatch): - with LLMObs.task(name="test_task") as span: +def test_tags(ddtrace_global_config, llmobs, llmobs_events, monkeypatch): + with llmobs.task(name="test_task") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "task", - tags={"version": "1.2.3", "env": "test_env", "service": "test_service", "ml_app": "test_app_name"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "task", + tags={"version": "1.2.3", "env": "test_env", "service": "test_service", "ml_app": "test_app_name"}, ) -@pytest.mark.parametrize( - "ddtrace_global_config", - [dict(version="1.2.3", env="test_env", service="test_service", _llmobs_ml_app="test_app_name")], -) -def test_tags_agentless(ddtrace_global_config, AgentlessLLMObs, mock_llmobs_span_agentless_writer, monkeypatch): - with AgentlessLLMObs.task(name="test_task") as span: +def test_ml_app_override(llmobs, llmobs_events): + with llmobs.task(name="test_task", ml_app="test_app") as span: pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "task", - tags={"version": "1.2.3", "env": "test_env", "service": "test_service", "ml_app": "test_app_name"}, - ) - ) - - -def test_ml_app_override(LLMObs, mock_llmobs_span_writer): - with LLMObs.task(name="test_task", ml_app="test_app") as span: + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "task", tags={"ml_app": "test_app"}) + with llmobs.tool(name="test_tool", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "task", tags={"ml_app": "test_app"}) - ) - with LLMObs.tool(name="test_tool", ml_app="test_app") as span: + assert len(llmobs_events) == 2 + assert llmobs_events[1] == _expected_llmobs_non_llm_span_event(span, "tool", tags={"ml_app": "test_app"}) + with llmobs.llm(model_name="model_name", name="test_llm", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "tool", tags={"ml_app": "test_app"}) + assert len(llmobs_events) == 3 + assert llmobs_events[2] == _expected_llmobs_llm_span_event( + span, "llm", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} ) - with LLMObs.llm(model_name="model_name", name="test_llm", ml_app="test_app") as span: + with llmobs.embedding(model_name="model_name", name="test_embedding", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "llm", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} - ) - ) - with LLMObs.embedding(model_name="model_name", name="test_embedding", ml_app="test_app") as span: - pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "embedding", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} - ) + assert len(llmobs_events) == 4 + assert llmobs_events[3] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} ) - with LLMObs.workflow(name="test_workflow", ml_app="test_app") as span: + with llmobs.workflow(name="test_workflow", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "workflow", tags={"ml_app": "test_app"}) - ) - with LLMObs.agent(name="test_agent", ml_app="test_app") as span: + assert len(llmobs_events) == 5 + assert llmobs_events[4] == _expected_llmobs_non_llm_span_event(span, "workflow", tags={"ml_app": "test_app"}) + with llmobs.agent(name="test_agent", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "agent", tags={"ml_app": "test_app"}) - ) - with LLMObs.retrieval(name="test_retrieval", ml_app="test_app") as span: + assert len(llmobs_events) == 6 + assert llmobs_events[5] == _expected_llmobs_llm_span_event(span, "agent", tags={"ml_app": "test_app"}) + with llmobs.retrieval(name="test_retrieval", ml_app="test_app") as span: pass - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "retrieval", tags={"ml_app": "test_app"}) - ) + assert len(llmobs_events) == 7 + assert llmobs_events[6] == _expected_llmobs_non_llm_span_event(span, "retrieval", tags={"ml_app": "test_app"}) -def test_ml_app_override_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer): - with AgentlessLLMObs.task(name="test_task", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "task", tags={"ml_app": "test_app"}) - ) - with AgentlessLLMObs.tool(name="test_tool", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "tool", tags={"ml_app": "test_app"}) - ) - with AgentlessLLMObs.llm(model_name="model_name", name="test_llm", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "llm", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} - ) - ) - with AgentlessLLMObs.embedding(model_name="model_name", name="test_embedding", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "embedding", model_name="model_name", model_provider="custom", tags={"ml_app": "test_app"} - ) - ) - with AgentlessLLMObs.workflow(name="test_workflow", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "workflow", tags={"ml_app": "test_app"}) - ) - with AgentlessLLMObs.agent(name="test_agent", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "agent", tags={"ml_app": "test_app"}) - ) - with AgentlessLLMObs.retrieval(name="test_retrieval", ml_app="test_app") as span: - pass - mock_llmobs_span_agentless_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "retrieval", tags={"ml_app": "test_app"}) - ) - +def test_export_span_specified_span_is_incorrect_type_raises_warning(llmobs, mock_llmobs_logs): + llmobs.export_span(span="asd") + mock_llmobs_logs.warning.assert_called_once_with("Failed to export span. Span must be a valid Span object.") -def test_export_span_specified_span_is_incorrect_type_raises_warning(LLMObs, mock_logs): - LLMObs.export_span(span="asd") - mock_logs.warning.assert_called_once_with("Failed to export span. Span must be a valid Span object.") - -def test_export_span_specified_span_is_not_llmobs_span_raises_warning(LLMObs, mock_logs): +def test_export_span_specified_span_is_not_llmobs_span_raises_warning(llmobs, mock_llmobs_logs): with DummyTracer().trace("non_llmobs_span") as span: - LLMObs.export_span(span=span) - mock_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") + llmobs.export_span(span=span) + mock_llmobs_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") -def test_export_span_specified_span_returns_span_context(LLMObs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - span_context = LLMObs.export_span(span=span) +def test_export_span_specified_span_returns_span_context(llmobs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + span_context = llmobs.export_span(span=span) assert span_context is not None assert span_context["span_id"] == str(span.span_id) assert span_context["trace_id"] == "{:x}".format(span.trace_id) -def test_export_span_no_specified_span_no_active_span_raises_warning(LLMObs, mock_logs): - LLMObs.export_span() - mock_logs.warning.assert_called_once_with("No span provided and no active LLMObs-generated span found.") +def test_export_span_no_specified_span_no_active_span_raises_warning(llmobs, mock_llmobs_logs): + llmobs.export_span() + mock_llmobs_logs.warning.assert_called_once_with("No span provided and no active LLMObs-generated span found.") -def test_export_span_active_span_not_llmobs_span_raises_warning(LLMObs, mock_logs): - with LLMObs._instance.tracer.trace("non_llmobs_span"): - LLMObs.export_span() - mock_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") +def test_export_span_active_span_not_llmobs_span_raises_warning(llmobs, mock_llmobs_logs): + with llmobs._instance.tracer.trace("non_llmobs_span"): + llmobs.export_span() + mock_llmobs_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") -def test_export_span_no_specified_span_returns_exported_active_span(LLMObs): - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - span_context = LLMObs.export_span() +def test_export_span_no_specified_span_returns_exported_active_span(llmobs): + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + span_context = llmobs.export_span() assert span_context is not None assert span_context["span_id"] == str(span.span_id) assert span_context["trace_id"] == "{:x}".format(span.trace_id) -def test_submit_evaluation_llmobs_disabled_raises_warning(LLMObs, mock_logs): - LLMObs.disable() - LLMObs.submit_evaluation( +def test_submit_evaluation_llmobs_disabled_raises_warning(llmobs, mock_llmobs_logs): + llmobs.disable() + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value="high" ) - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "LLMObs.submit_evaluation() called when LLMObs is not enabled. Evaluation metric data will not be sent." ) -def test_submit_evaluation_no_api_key_raises_warning(AgentlessLLMObs, mock_logs): +def test_submit_evaluation_no_api_key_raises_warning(llmobs, mock_llmobs_logs): with override_global_config(dict(_dd_api_key="")): - AgentlessLLMObs.submit_evaluation( + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value="high", ) - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "DD_API_KEY is required for sending evaluation metrics. Evaluation metric data will not be sent. " "Ensure this configuration is set before running your application." ) -def test_submit_evaluation_ml_app_raises_warning(LLMObs, mock_logs): +def test_submit_evaluation_ml_app_raises_warning(llmobs, mock_llmobs_logs): with override_global_config(dict(_llmobs_ml_app="")): - LLMObs.submit_evaluation( + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value="high", ) - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. " "Ensure this configuration is set before running your application." ) -def test_submit_evaluation_span_context_incorrect_type_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation(span_context="asd", label="toxicity", metric_type="categorical", value="high") - mock_logs.warning.assert_called_once_with( +def test_submit_evaluation_span_context_incorrect_type_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation(span_context="asd", label="toxicity", metric_type="categorical", value="high") + mock_llmobs_logs.warning.assert_called_once_with( "span_context must be a dictionary containing both span_id and trace_id keys. " "LLMObs.export_span() can be used to generate this dictionary from a given span." ) -def test_submit_evaluation_empty_span_or_trace_id_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_empty_span_or_trace_id_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"trace_id": "456"}, label="toxicity", metric_type="categorical", value="high" ) - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "span_id and trace_id must both be specified for the given evaluation metric to be submitted." ) - mock_logs.reset_mock() - LLMObs.submit_evaluation(span_context={"span_id": "456"}, label="toxicity", metric_type="categorical", value="high") - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.reset_mock() + llmobs.submit_evaluation(span_context={"span_id": "456"}, label="toxicity", metric_type="categorical", value="high") + mock_llmobs_logs.warning.assert_called_once_with( "span_id and trace_id must both be specified for the given evaluation metric to be submitted." ) -def test_submit_evaluation_invalid_timestamp_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_invalid_timestamp_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="", metric_type="categorical", @@ -1074,35 +898,35 @@ def test_submit_evaluation_invalid_timestamp_raises_warning(LLMObs, mock_logs): ml_app="dummy", timestamp_ms="invalid", ) - mock_logs.warning.assert_called_once_with( + mock_llmobs_logs.warning.assert_called_once_with( "timestamp_ms must be a non-negative integer. Evaluation metric data will not be sent" ) -def test_submit_evaluation_empty_label_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_empty_label_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="", metric_type="categorical", value="high" ) - mock_logs.warning.assert_called_once_with("label must be the specified name of the evaluation metric.") + mock_llmobs_logs.warning.assert_called_once_with("label must be the specified name of the evaluation metric.") -def test_submit_evaluation_incorrect_metric_type_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_incorrect_metric_type_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="wrong", value="high" ) - mock_logs.warning.assert_called_once_with("metric_type must be one of 'categorical' or 'score'.") - mock_logs.reset_mock() - LLMObs.submit_evaluation( + mock_llmobs_logs.warning.assert_called_once_with("metric_type must be one of 'categorical' or 'score'.") + mock_llmobs_logs.reset_mock() + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="", value="high" ) - mock_logs.warning.assert_called_once_with("metric_type must be one of 'categorical' or 'score'.") + mock_llmobs_logs.warning.assert_called_once_with("metric_type must be one of 'categorical' or 'score'.") -def test_submit_evaluation_numerical_value_raises_unsupported_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_numerical_value_raises_unsupported_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="token_count", metric_type="numerical", value="high" ) - mock_logs.warning.assert_has_calls( + mock_llmobs_logs.warning.assert_has_calls( [ mock.call( "The evaluation metric type 'numerical' is unsupported. Use 'score' instead. " @@ -1112,44 +936,44 @@ def test_submit_evaluation_numerical_value_raises_unsupported_warning(LLMObs, mo ) -def test_submit_evaluation_incorrect_numerical_value_type_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_incorrect_numerical_value_type_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="token_count", metric_type="numerical", value="high" ) - mock_logs.warning.assert_has_calls( + mock_llmobs_logs.warning.assert_has_calls( [ mock.call("value must be an integer or float for a score metric."), ] ) -def test_submit_evaluation_incorrect_score_value_type_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_incorrect_score_value_type_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="token_count", metric_type="score", value="high" ) - mock_logs.warning.assert_called_once_with("value must be an integer or float for a score metric.") + mock_llmobs_logs.warning.assert_called_once_with("value must be an integer or float for a score metric.") -def test_submit_evaluation_invalid_tags_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_invalid_tags_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value="high", tags=["invalid"], ) - mock_logs.warning.assert_called_once_with("tags must be a dictionary of string key-value pairs.") + mock_llmobs_logs.warning.assert_called_once_with("tags must be a dictionary of string key-value pairs.") -def test_submit_evaluation_invalid_metadata_raises_warning(LLMObs, mock_logs): - LLMObs.submit_evaluation( +def test_submit_evaluation_invalid_metadata_raises_warning(llmobs, mock_llmobs_logs): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value="high", metadata=1, ) - mock_logs.warning.assert_called_once_with("metadata must be json serializable dictionary.") + mock_llmobs_logs.warning.assert_called_once_with("metadata must be json serializable dictionary.") @pytest.mark.parametrize( @@ -1157,9 +981,9 @@ def test_submit_evaluation_invalid_metadata_raises_warning(LLMObs, mock_logs): [dict(_llmobs_ml_app="test_app_name")], ) def test_submit_evaluation_non_string_tags_raises_warning_but_still_submits( - LLMObs, mock_logs, mock_llmobs_eval_metric_writer + llmobs, mock_llmobs_logs, mock_llmobs_eval_metric_writer ): - LLMObs.submit_evaluation( + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", @@ -1167,8 +991,10 @@ def test_submit_evaluation_non_string_tags_raises_warning_but_still_submits( tags={1: 2, "foo": "bar"}, ml_app="dummy", ) - mock_logs.warning.assert_called_once_with("Failed to parse tags. Tags for evaluation metrics must be strings.") - mock_logs.reset_mock() + mock_llmobs_logs.warning.assert_called_once_with( + "Failed to parse tags. Tags for evaluation metrics must be strings." + ) + mock_llmobs_logs.reset_mock() mock_llmobs_eval_metric_writer.enqueue.assert_called_with( _expected_llmobs_eval_metric_event( ml_app="dummy", @@ -1186,8 +1012,8 @@ def test_submit_evaluation_non_string_tags_raises_warning_but_still_submits( "ddtrace_global_config", [dict(ddtrace="1.2.3", env="test_env", service="test_service", _llmobs_ml_app="test_app_name")], ) -def test_submit_evaluation_metric_tags(LLMObs, mock_llmobs_eval_metric_writer): - LLMObs.submit_evaluation( +def test_submit_evaluation_metric_tags(llmobs, mock_llmobs_eval_metric_writer): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", @@ -1212,8 +1038,8 @@ def test_submit_evaluation_metric_tags(LLMObs, mock_llmobs_eval_metric_writer): "ddtrace_global_config", [dict(ddtrace="1.2.3", env="test_env", service="test_service", _llmobs_ml_app="test_app_name")], ) -def test_submit_evaluation_metric_with_metadata_enqueues_metric(LLMObs, mock_llmobs_eval_metric_writer): - LLMObs.submit_evaluation( +def test_submit_evaluation_metric_with_metadata_enqueues_metric(llmobs, mock_llmobs_eval_metric_writer): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", @@ -1235,7 +1061,7 @@ def test_submit_evaluation_metric_with_metadata_enqueues_metric(LLMObs, mock_llm ) ) mock_llmobs_eval_metric_writer.reset() - LLMObs.submit_evaluation( + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", @@ -1257,8 +1083,8 @@ def test_submit_evaluation_metric_with_metadata_enqueues_metric(LLMObs, mock_llm ) -def test_submit_evaluation_enqueues_writer_with_categorical_metric(LLMObs, mock_llmobs_eval_metric_writer): - LLMObs.submit_evaluation( +def test_submit_evaluation_enqueues_writer_with_categorical_metric(llmobs, mock_llmobs_eval_metric_writer): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", @@ -1276,9 +1102,9 @@ def test_submit_evaluation_enqueues_writer_with_categorical_metric(LLMObs, mock_ ) ) mock_llmobs_eval_metric_writer.reset_mock() - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.submit_evaluation( - span_context=LLMObs.export_span(span), + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.submit_evaluation( + span_context=llmobs.export_span(span), label="toxicity", metric_type="categorical", value="high", @@ -1296,8 +1122,8 @@ def test_submit_evaluation_enqueues_writer_with_categorical_metric(LLMObs, mock_ ) -def test_submit_evaluation_enqueues_writer_with_score_metric(LLMObs, mock_llmobs_eval_metric_writer): - LLMObs.submit_evaluation( +def test_submit_evaluation_enqueues_writer_with_score_metric(llmobs, mock_llmobs_eval_metric_writer): + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="sentiment", metric_type="score", @@ -1310,9 +1136,9 @@ def test_submit_evaluation_enqueues_writer_with_score_metric(LLMObs, mock_llmobs ) ) mock_llmobs_eval_metric_writer.reset_mock() - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.submit_evaluation( - span_context=LLMObs.export_span(span), label="sentiment", metric_type="score", value=0.9, ml_app="dummy" + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.submit_evaluation( + span_context=llmobs.export_span(span), label="sentiment", metric_type="score", value=0.9, ml_app="dummy" ) mock_llmobs_eval_metric_writer.enqueue.assert_called_with( _expected_llmobs_eval_metric_event( @@ -1327,9 +1153,9 @@ def test_submit_evaluation_enqueues_writer_with_score_metric(LLMObs, mock_llmobs def test_submit_evaluation_with_numerical_metric_enqueues_writer_with_score_metric( - LLMObs, mock_llmobs_eval_metric_writer + llmobs, mock_llmobs_eval_metric_writer ): - LLMObs.submit_evaluation( + llmobs.submit_evaluation( span_context={"span_id": "123", "trace_id": "456"}, label="token_count", metric_type="numerical", @@ -1342,9 +1168,9 @@ def test_submit_evaluation_with_numerical_metric_enqueues_writer_with_score_metr ) ) mock_llmobs_eval_metric_writer.reset_mock() - with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - LLMObs.submit_evaluation( - span_context=LLMObs.export_span(span), + with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: + llmobs.submit_evaluation( + span_context=llmobs.export_span(span), label="token_count", metric_type="numerical", value=35, @@ -1363,143 +1189,143 @@ def test_submit_evaluation_with_numerical_metric_enqueues_writer_with_score_metr def test_flush_calls_periodic_agentless( - AgentlessLLMObs, mock_llmobs_span_agentless_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner + AgentlessLLMObs, mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner ): AgentlessLLMObs.flush() - mock_llmobs_span_agentless_writer.periodic.assert_called_once() + mock_llmobs_span_writer.periodic.assert_called_once() mock_llmobs_eval_metric_writer.periodic.assert_called_once() mock_llmobs_evaluator_runner.periodic.assert_called_once() def test_flush_does_not_call_periodic_when_llmobs_is_disabled( - LLMObs, + llmobs, mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner, - mock_logs, + mock_llmobs_logs, disabled_llmobs, ): - LLMObs.flush() + llmobs.flush() mock_llmobs_span_writer.periodic.assert_not_called() mock_llmobs_eval_metric_writer.periodic.assert_not_called() mock_llmobs_evaluator_runner.periodic.assert_not_called() - mock_logs.warning.assert_has_calls( + mock_llmobs_logs.warning.assert_has_calls( [mock.call("flushing when LLMObs is disabled. No spans or evaluation metrics will be sent.")] ) def test_flush_does_not_call_periodic_when_llmobs_is_disabled_agentless( AgentlessLLMObs, - mock_llmobs_span_agentless_writer, + mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner, - mock_logs, + mock_llmobs_logs, disabled_llmobs, ): AgentlessLLMObs.flush() - mock_llmobs_span_agentless_writer.periodic.assert_not_called() + mock_llmobs_span_writer.periodic.assert_not_called() mock_llmobs_eval_metric_writer.periodic.assert_not_called() mock_llmobs_evaluator_runner.periodic.assert_not_called() - mock_logs.warning.assert_has_calls( + mock_llmobs_logs.warning.assert_has_calls( [mock.call("flushing when LLMObs is disabled. No spans or evaluation metrics will be sent.")] ) -def test_inject_distributed_headers_llmobs_disabled_does_nothing(LLMObs, mock_logs): - LLMObs.disable() - headers = LLMObs.inject_distributed_headers({}, span=None) - mock_logs.warning.assert_called_once_with( +def test_inject_distributed_headers_llmobs_disabled_does_nothing(llmobs, mock_llmobs_logs): + llmobs.disable() + headers = llmobs.inject_distributed_headers({}, span=None) + mock_llmobs_logs.warning.assert_called_once_with( "LLMObs.inject_distributed_headers() called when LLMObs is not enabled. " "Distributed context will not be injected." ) assert headers == {} -def test_inject_distributed_headers_not_dict_logs_warning(LLMObs, mock_logs): - headers = LLMObs.inject_distributed_headers("not a dictionary", span=None) - mock_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") +def test_inject_distributed_headers_not_dict_logs_warning(llmobs, mock_llmobs_logs): + headers = llmobs.inject_distributed_headers("not a dictionary", span=None) + mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") assert headers == "not a dictionary" - mock_logs.reset_mock() - headers = LLMObs.inject_distributed_headers(123, span=None) - mock_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") + mock_llmobs_logs.reset_mock() + headers = llmobs.inject_distributed_headers(123, span=None) + mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") assert headers == 123 - mock_logs.reset_mock() - headers = LLMObs.inject_distributed_headers(None, span=None) - mock_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") + mock_llmobs_logs.reset_mock() + headers = llmobs.inject_distributed_headers(None, span=None) + mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") assert headers is None -def test_inject_distributed_headers_no_active_span_logs_warning(LLMObs, mock_logs): - headers = LLMObs.inject_distributed_headers({}, span=None) - mock_logs.warning.assert_called_once_with("No span provided and no currently active span found.") +def test_inject_distributed_headers_no_active_span_logs_warning(llmobs, mock_llmobs_logs): + headers = llmobs.inject_distributed_headers({}, span=None) + mock_llmobs_logs.warning.assert_called_once_with("No span provided and no currently active span found.") assert headers == {} -def test_inject_distributed_headers_span_calls_httppropagator_inject(LLMObs, mock_logs): - span = LLMObs._instance.tracer.trace("test_span") +def test_inject_distributed_headers_span_calls_httppropagator_inject(llmobs, mock_llmobs_logs): + span = llmobs._instance.tracer.trace("test_span") with mock.patch("ddtrace.propagation.http.HTTPPropagator.inject") as mock_inject: - LLMObs.inject_distributed_headers({}, span=span) + llmobs.inject_distributed_headers({}, span=span) assert mock_inject.call_count == 1 mock_inject.assert_called_once_with(span.context, {}) -def test_inject_distributed_headers_current_active_span_injected(LLMObs, mock_logs): - span = LLMObs._instance.tracer.trace("test_span") +def test_inject_distributed_headers_current_active_span_injected(llmobs, mock_llmobs_logs): + span = llmobs._instance.tracer.trace("test_span") with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.inject") as mock_inject: - LLMObs.inject_distributed_headers({}, span=None) + llmobs.inject_distributed_headers({}, span=None) assert mock_inject.call_count == 1 mock_inject.assert_called_once_with(span.context, {}) -def test_activate_distributed_headers_llmobs_disabled_does_nothing(LLMObs, mock_logs): - LLMObs.disable() - LLMObs.activate_distributed_headers({}) - mock_logs.warning.assert_called_once_with( +def test_activate_distributed_headers_llmobs_disabled_does_nothing(llmobs, mock_llmobs_logs): + llmobs.disable() + llmobs.activate_distributed_headers({}) + mock_llmobs_logs.warning.assert_called_once_with( "LLMObs.activate_distributed_headers() called when LLMObs is not enabled. " "Distributed context will not be activated." ) -def test_activate_distributed_headers_calls_httppropagator_extract(LLMObs, mock_logs): +def test_activate_distributed_headers_calls_httppropagator_extract(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: - LLMObs.activate_distributed_headers({}) + llmobs.activate_distributed_headers({}) assert mock_extract.call_count == 1 mock_extract.assert_called_once_with({}) -def test_activate_distributed_headers_no_trace_id_does_nothing(LLMObs, mock_logs): +def test_activate_distributed_headers_no_trace_id_does_nothing(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: mock_extract.return_value = Context(span_id="123", meta={PROPAGATED_PARENT_ID_KEY: "123"}) - LLMObs.activate_distributed_headers({}) + llmobs.activate_distributed_headers({}) assert mock_extract.call_count == 1 - mock_logs.warning.assert_called_once_with("Failed to extract trace ID or span ID from request headers.") + mock_llmobs_logs.warning.assert_called_once_with("Failed to extract trace ID or span ID from request headers.") -def test_activate_distributed_headers_no_span_id_does_nothing(LLMObs, mock_logs): +def test_activate_distributed_headers_no_span_id_does_nothing(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: mock_extract.return_value = Context(trace_id="123", meta={PROPAGATED_PARENT_ID_KEY: "123"}) - LLMObs.activate_distributed_headers({}) + llmobs.activate_distributed_headers({}) assert mock_extract.call_count == 1 - mock_logs.warning.assert_called_once_with("Failed to extract trace ID or span ID from request headers.") + mock_llmobs_logs.warning.assert_called_once_with("Failed to extract trace ID or span ID from request headers.") -def test_activate_distributed_headers_no_llmobs_parent_id_does_nothing(LLMObs, mock_logs): +def test_activate_distributed_headers_no_llmobs_parent_id_does_nothing(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: dummy_context = Context(trace_id="123", span_id="456") mock_extract.return_value = dummy_context with mock.patch("ddtrace.llmobs.LLMObs._instance.tracer.context_provider.activate") as mock_activate: - LLMObs.activate_distributed_headers({}) + llmobs.activate_distributed_headers({}) assert mock_extract.call_count == 1 - mock_logs.warning.assert_called_once_with("Failed to extract LLMObs parent ID from request headers.") + mock_llmobs_logs.warning.assert_called_once_with("Failed to extract LLMObs parent ID from request headers.") mock_activate.assert_called_once_with(dummy_context) -def test_activate_distributed_headers_activates_context(LLMObs, mock_logs): +def test_activate_distributed_headers_activates_context(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: dummy_context = Context(trace_id="123", span_id="456", meta={PROPAGATED_PARENT_ID_KEY: "789"}) mock_extract.return_value = dummy_context with mock.patch("ddtrace.llmobs.LLMObs._instance.tracer.context_provider.activate") as mock_activate: - LLMObs.activate_distributed_headers({}) + llmobs.activate_distributed_headers({}) assert mock_extract.call_count == 1 mock_activate.assert_called_once_with(dummy_context) @@ -1514,16 +1340,10 @@ def test_llmobs_fork_recreates_and_restarts_span_writer(): if pid: # parent assert llmobs_service._instance.tracer._pid == original_pid assert llmobs_service._instance._llmobs_span_writer == original_span_writer - assert ( - llmobs_service._instance._trace_processor._span_writer == llmobs_service._instance._llmobs_span_writer - ) assert llmobs_service._instance._llmobs_span_writer.status == ServiceStatus.RUNNING else: # child assert llmobs_service._instance.tracer._pid != original_pid assert llmobs_service._instance._llmobs_span_writer != original_span_writer - assert ( - llmobs_service._instance._trace_processor._span_writer == llmobs_service._instance._llmobs_span_writer - ) assert llmobs_service._instance._llmobs_span_writer.status == ServiceStatus.RUNNING llmobs_service.disable() os._exit(12) @@ -1569,18 +1389,10 @@ def test_llmobs_fork_recreates_and_restarts_evaluator_runner(mock_ragas_evaluato if pid: # parent assert llmobs_service._instance.tracer._pid == original_pid assert llmobs_service._instance._evaluator_runner == original_evaluator_runner - assert ( - llmobs_service._instance._trace_processor._evaluator_runner - == llmobs_service._instance._evaluator_runner - ) assert llmobs_service._instance._evaluator_runner.status == ServiceStatus.RUNNING else: # child assert llmobs_service._instance.tracer._pid != original_pid assert llmobs_service._instance._evaluator_runner != original_evaluator_runner - assert ( - llmobs_service._instance._trace_processor._evaluator_runner - == llmobs_service._instance._evaluator_runner - ) assert llmobs_service._instance._evaluator_runner.status == ServiceStatus.RUNNING llmobs_service.disable() os._exit(12) @@ -1667,42 +1479,6 @@ def test_llmobs_fork_evaluator_runner_run(monkeypatch): llmobs_service.disable() -def test_llmobs_fork_custom_filter(monkeypatch): - """Test that forking a process correctly keeps any custom filters.""" - - class CustomFilter(TraceFilter): - def process_trace(self, trace): - return trace - - monkeypatch.setenv("_DD_LLMOBS_WRITER_INTERVAL", 5.0) - with mock.patch("ddtrace.internal.writer.HTTPWriter._send_payload"): - tracer = DummyTracer() - custom_filter = CustomFilter() - tracer.configure(settings={"FILTERS": [custom_filter]}) - llmobs_service.enable(_tracer=tracer, ml_app="test_app") - assert custom_filter in llmobs_service._instance.tracer._filters - pid = os.fork() - if pid: # parent - assert custom_filter in llmobs_service._instance.tracer._filters - assert any( - isinstance(tracer_filter, LLMObsTraceProcessor) - for tracer_filter in llmobs_service._instance.tracer._filters - ) - else: # child - assert custom_filter in llmobs_service._instance.tracer._filters - assert any( - isinstance(tracer_filter, LLMObsTraceProcessor) - for tracer_filter in llmobs_service._instance.tracer._filters - ) - llmobs_service.disable() - os._exit(12) - - _, status = os.waitpid(pid, 0) - exit_code = os.WEXITSTATUS(status) - assert exit_code == 12 - llmobs_service.disable() - - def test_llmobs_fork_disabled(monkeypatch): """Test that after being disabled the service remains disabled when forking""" monkeypatch.setenv("DD_LLMOBS_ENABLED", "0") @@ -1746,46 +1522,46 @@ def test_llmobs_fork_disabled_then_enabled(monkeypatch): svc.disable() -def test_llmobs_with_evaluator_runner(LLMObs, mock_llmobs_evaluator_runner): - with LLMObs.llm(model_name="test_model"): +def test_llmobs_with_evaluator_runner(llmobs, mock_llmobs_evaluator_runner): + with llmobs.llm(model_name="test_model"): pass time.sleep(0.1) - assert LLMObs._instance._evaluator_runner.enqueue.call_count == 1 + assert llmobs._instance._evaluator_runner.enqueue.call_count == 1 -def test_llmobs_with_evaluator_runner_does_not_enqueue_evaluation_spans(mock_llmobs_evaluator_runner, LLMObs): - with LLMObs.llm(model_name="test_model", ml_app="{}-dummy".format(RAGAS_ML_APP_PREFIX)): +def test_llmobs_with_evaluator_runner_does_not_enqueue_evaluation_spans(mock_llmobs_evaluator_runner, llmobs): + with llmobs.llm(model_name="test_model", ml_app="{}-dummy".format(RAGAS_ML_APP_PREFIX)): pass time.sleep(0.1) - assert LLMObs._instance._evaluator_runner.enqueue.call_count == 0 + assert llmobs._instance._evaluator_runner.enqueue.call_count == 0 -def test_llmobs_with_evaluation_runner_does_not_enqueue_non_llm_spans(mock_llmobs_evaluator_runner, LLMObs): - with LLMObs.workflow(name="test"): +def test_llmobs_with_evaluation_runner_does_not_enqueue_non_llm_spans(mock_llmobs_evaluator_runner, llmobs): + with llmobs.workflow(name="test"): pass - with LLMObs.agent(name="test"): + with llmobs.agent(name="test"): pass - with LLMObs.task(name="test"): + with llmobs.task(name="test"): pass - with LLMObs.embedding(model_name="test"): + with llmobs.embedding(model_name="test"): pass - with LLMObs.retrieval(name="test"): + with llmobs.retrieval(name="test"): pass - with LLMObs.tool(name="test"): + with llmobs.tool(name="test"): pass time.sleep(0.1) - assert LLMObs._instance._evaluator_runner.enqueue.call_count == 0 + assert llmobs._instance._evaluator_runner.enqueue.call_count == 0 -def test_annotation_context_modifies_span_tags(LLMObs): - with LLMObs.annotation_context(tags={"foo": "bar"}): - with LLMObs.agent(name="test_agent") as span: +def test_annotation_context_modifies_span_tags(llmobs): + with llmobs.annotation_context(tags={"foo": "bar"}): + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) == {"foo": "bar"} -def test_annotation_context_modifies_prompt(LLMObs): - with LLMObs.annotation_context(prompt={"template": "test_template"}): - with LLMObs.llm(name="test_agent", model_name="test") as span: +def test_annotation_context_modifies_prompt(llmobs): + with llmobs.annotation_context(prompt={"template": "test_template"}): + with llmobs.llm(name="test_agent", model_name="test") as span: assert span._get_ctx_item(INPUT_PROMPT) == { "template": "test_template", "_dd_context_variable_keys": ["context"], @@ -1793,80 +1569,80 @@ def test_annotation_context_modifies_prompt(LLMObs): } -def test_annotation_context_modifies_name(LLMObs): - with LLMObs.annotation_context(name="test_agent_override"): - with LLMObs.llm(name="test_agent", model_name="test") as span: +def test_annotation_context_modifies_name(llmobs): + with llmobs.annotation_context(name="test_agent_override"): + with llmobs.llm(name="test_agent", model_name="test") as span: assert span.name == "test_agent_override" -def test_annotation_context_finished_context_does_not_modify_tags(LLMObs): - with LLMObs.annotation_context(tags={"foo": "bar"}): +def test_annotation_context_finished_context_does_not_modify_tags(llmobs): + with llmobs.annotation_context(tags={"foo": "bar"}): pass - with LLMObs.agent(name="test_agent") as span: + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) is None -def test_annotation_context_finished_context_does_not_modify_prompt(LLMObs): - with LLMObs.annotation_context(prompt={"template": "test_template"}): +def test_annotation_context_finished_context_does_not_modify_prompt(llmobs): + with llmobs.annotation_context(prompt={"template": "test_template"}): pass - with LLMObs.llm(name="test_agent", model_name="test") as span: + with llmobs.llm(name="test_agent", model_name="test") as span: assert span._get_ctx_item(INPUT_PROMPT) is None -def test_annotation_context_finished_context_does_not_modify_name(LLMObs): - with LLMObs.annotation_context(name="test_agent_override"): +def test_annotation_context_finished_context_does_not_modify_name(llmobs): + with llmobs.annotation_context(name="test_agent_override"): pass - with LLMObs.agent(name="test_agent") as span: + with llmobs.agent(name="test_agent") as span: assert span.name == "test_agent" -def test_annotation_context_nested(LLMObs): - with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}): - with LLMObs.annotation_context(tags={"foo": "baz"}): - with LLMObs.agent(name="test_agent") as span: +def test_annotation_context_nested(llmobs): + with llmobs.annotation_context(tags={"foo": "bar", "boo": "bar"}): + with llmobs.annotation_context(tags={"foo": "baz"}): + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"} -def test_annotation_context_nested_overrides_name(LLMObs): - with LLMObs.annotation_context(name="unexpected"): - with LLMObs.annotation_context(name="expected"): - with LLMObs.agent(name="test_agent") as span: +def test_annotation_context_nested_overrides_name(llmobs): + with llmobs.annotation_context(name="unexpected"): + with llmobs.annotation_context(name="expected"): + with llmobs.agent(name="test_agent") as span: assert span.name == "expected" -def test_annotation_context_nested_maintains_trace_structure(LLMObs, mock_llmobs_span_writer): +def test_annotation_context_nested_maintains_trace_structure(llmobs, llmobs_events): """This test makes sure starting/stopping annotation contexts do not modify the llmobs trace structure""" - with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}): - with LLMObs.agent(name="parent_span") as parent_span: - with LLMObs.annotation_context(tags={"foo": "baz"}): - with LLMObs.workflow(name="child_span") as child_span: + with llmobs.annotation_context(tags={"foo": "bar", "boo": "bar"}): + with llmobs.agent(name="parent_span") as parent_span: + with llmobs.annotation_context(tags={"foo": "baz"}): + with llmobs.workflow(name="child_span") as child_span: assert child_span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"} assert parent_span._get_ctx_item(TAGS) == {"foo": "bar", "boo": "bar"} - assert len(mock_llmobs_span_writer.enqueue.call_args_list) == 2 - parent_span, child_span = [span[0] for span, _ in mock_llmobs_span_writer.enqueue.call_args_list] + assert len(llmobs_events) == 2 + parent_span, child_span = llmobs_events[1], llmobs_events[0] assert child_span["trace_id"] == parent_span["trace_id"] assert child_span["span_id"] != parent_span["span_id"] assert child_span["parent_id"] == parent_span["span_id"] assert parent_span["parent_id"] == "undefined" - mock_llmobs_span_writer.reset_mock() - with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}): - with LLMObs.agent(name="parent_span"): +def test_annotation_context_separate_traces_maintained(llmobs, llmobs_events): + with llmobs.annotation_context(tags={"foo": "bar", "boo": "bar"}): + with llmobs.agent(name="parent_span"): pass - with LLMObs.workflow(name="child_span"): + with llmobs.workflow(name="child_span"): pass - assert len(mock_llmobs_span_writer.enqueue.call_args_list) == 2 - trace_one, trace_two = [span[0] for span, _ in mock_llmobs_span_writer.enqueue.call_args_list] - assert trace_one["trace_id"] != trace_two["trace_id"] - assert trace_one["span_id"] != trace_two["span_id"] - assert trace_two["parent_id"] == "undefined" - assert trace_one["parent_id"] == "undefined" + assert len(llmobs_events) == 2 + agent_span, workflow_span = llmobs_events[1], llmobs_events[0] + assert agent_span["trace_id"] != workflow_span["trace_id"] + assert agent_span["span_id"] != workflow_span["span_id"] + assert workflow_span["parent_id"] == "undefined" + assert agent_span["parent_id"] == "undefined" -def test_annotation_context_only_applies_to_local_context(LLMObs): +def test_annotation_context_only_applies_to_local_context(llmobs): """ tests that annotation contexts only apply to spans belonging to the same trace context and not globally to all spans. @@ -1882,8 +1658,8 @@ def test_annotation_context_only_applies_to_local_context(LLMObs): def context_one(): nonlocal agent_has_correct_name nonlocal agent_has_correct_tags - with LLMObs.annotation_context(name="expected_agent", tags={"foo": "bar"}): - with LLMObs.agent(name="test_agent") as span: + with llmobs.annotation_context(name="expected_agent", tags={"foo": "bar"}): + with llmobs.agent(name="test_agent") as span: event.wait() agent_has_correct_tags = span._get_ctx_item(TAGS) == {"foo": "bar"} agent_has_correct_name = span.name == "expected_agent" @@ -1892,9 +1668,9 @@ def context_one(): def context_two(): nonlocal tool_has_correct_name nonlocal tool_does_not_have_tags - with LLMObs.agent(name="test_agent"): - with LLMObs.annotation_context(name="expected_tool"): - with LLMObs.tool(name="test_tool") as tool_span: + with llmobs.agent(name="test_agent"): + with llmobs.annotation_context(name="expected_tool"): + with llmobs.tool(name="test_tool") as tool_span: event.wait() tool_does_not_have_tags = tool_span._get_ctx_item(TAGS) is None tool_has_correct_name = tool_span.name == "expected_tool" @@ -1904,7 +1680,7 @@ def context_two(): thread_one.start() thread_two.start() - with LLMObs.agent(name="test_agent") as span: + with llmobs.agent(name="test_agent") as span: assert span.name == "test_agent" assert span._get_ctx_item(TAGS) is None @@ -1920,15 +1696,15 @@ def context_two(): assert tool_does_not_have_tags is True -async def test_annotation_context_async_modifies_span_tags(LLMObs): - async with LLMObs.annotation_context(tags={"foo": "bar"}): - with LLMObs.agent(name="test_agent") as span: +async def test_annotation_context_async_modifies_span_tags(llmobs): + async with llmobs.annotation_context(tags={"foo": "bar"}): + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) == {"foo": "bar"} -async def test_annotation_context_async_modifies_prompt(LLMObs): - async with LLMObs.annotation_context(prompt={"template": "test_template"}): - with LLMObs.llm(name="test_agent", model_name="test") as span: +async def test_annotation_context_async_modifies_prompt(llmobs): + async with llmobs.annotation_context(prompt={"template": "test_template"}): + with llmobs.llm(name="test_agent", model_name="test") as span: assert span._get_ctx_item(INPUT_PROMPT) == { "template": "test_template", "_dd_context_variable_keys": ["context"], @@ -1936,41 +1712,42 @@ async def test_annotation_context_async_modifies_prompt(LLMObs): } -async def test_annotation_context_async_modifies_name(LLMObs): - async with LLMObs.annotation_context(name="test_agent_override"): - with LLMObs.llm(name="test_agent", model_name="test") as span: +async def test_annotation_context_async_modifies_name(llmobs): + async with llmobs.annotation_context(name="test_agent_override"): + with llmobs.llm(name="test_agent", model_name="test") as span: assert span.name == "test_agent_override" -async def test_annotation_context_async_finished_context_does_not_modify_tags(LLMObs): - async with LLMObs.annotation_context(tags={"foo": "bar"}): +async def test_annotation_context_async_finished_context_does_not_modify_tags(llmobs): + async with llmobs.annotation_context(tags={"foo": "bar"}): pass - with LLMObs.agent(name="test_agent") as span: + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) is None -async def test_annotation_context_async_finished_context_does_not_modify_prompt(LLMObs): - async with LLMObs.annotation_context(prompt={"template": "test_template"}): +async def test_annotation_context_async_finished_context_does_not_modify_prompt(llmobs): + async with llmobs.annotation_context(prompt={"template": "test_template"}): pass - with LLMObs.llm(name="test_agent", model_name="test") as span: + with llmobs.llm(name="test_agent", model_name="test") as span: assert span._get_ctx_item(INPUT_PROMPT) is None -async def test_annotation_context_finished_context_async_does_not_modify_name(LLMObs): - async with LLMObs.annotation_context(name="test_agent_override"): +async def test_annotation_context_finished_context_async_does_not_modify_name(llmobs): + async with llmobs.annotation_context(name="test_agent_override"): pass - with LLMObs.agent(name="test_agent") as span: + with llmobs.agent(name="test_agent") as span: assert span.name == "test_agent" -async def test_annotation_context_async_nested(LLMObs): - async with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}): - async with LLMObs.annotation_context(tags={"foo": "baz"}): - with LLMObs.agent(name="test_agent") as span: +async def test_annotation_context_async_nested(llmobs): + async with llmobs.annotation_context(tags={"foo": "bar", "boo": "bar"}): + async with llmobs.annotation_context(tags={"foo": "baz"}): + with llmobs.agent(name="test_agent") as span: assert span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"} def test_service_enable_starts_evaluator_runner_when_evaluators_exist(): + pytest.importorskip("ragas") with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")): dummy_tracer = DummyTracer() diff --git a/tests/llmobs/test_llmobs_span_agent_writer.py b/tests/llmobs/test_llmobs_span_agent_writer.py index 76fe0f21aef..d16bb9f0e2c 100644 --- a/tests/llmobs/test_llmobs_span_agent_writer.py +++ b/tests/llmobs/test_llmobs_span_agent_writer.py @@ -44,7 +44,8 @@ def test_flush_queue_when_event_cause_queue_to_exceed_payload_limit( [ mock.call("flushing queue because queuing next event will exceed EVP payload limit"), mock.call("encode %d LLMObs span events to be sent", 5), - ] + ], + any_order=True, ) diff --git a/tests/llmobs/test_llmobs_span_agentless_writer.py b/tests/llmobs/test_llmobs_span_agentless_writer.py index 4882f3553d8..4a54faf130d 100644 --- a/tests/llmobs/test_llmobs_span_agentless_writer.py +++ b/tests/llmobs/test_llmobs_span_agentless_writer.py @@ -75,26 +75,25 @@ def test_truncating_oversized_events(mock_writer_logs, mock_http_writer_send_pay ) -def test_send_completion_event(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response): +def test_send_completion_event(mock_writer_logs, mock_http_writer_send_payload_response): with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")): llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=1, timeout=1) llmobs_span_writer.start() llmobs_span_writer.enqueue(_completion_event()) llmobs_span_writer.periodic() mock_writer_logs.debug.assert_has_calls([mock.call("encode %d LLMObs span events to be sent", 1)]) - mock_http_writer_logs.error.assert_not_called() -def test_send_chat_completion_event(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response): +def test_send_chat_completion_event(mock_writer_logs, mock_http_writer_send_payload_response): with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")): llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=1, timeout=1) llmobs_span_writer.start() llmobs_span_writer.enqueue(_chat_completion_event()) llmobs_span_writer.periodic() mock_writer_logs.debug.assert_has_calls([mock.call("encode %d LLMObs span events to be sent", 1)]) - mock_http_writer_logs.error.assert_not_called() +@mock.patch("ddtrace.internal.writer.writer.log") def test_send_completion_bad_api_key(mock_http_writer_logs, mock_http_writer_put_response_forbidden): with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="")): llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=1, timeout=1) @@ -109,7 +108,7 @@ def test_send_completion_bad_api_key(mock_http_writer_logs, mock_http_writer_put ) -def test_send_timed_events(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response): +def test_send_timed_events(mock_writer_logs, mock_http_writer_send_payload_response): with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")): llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=0.01, timeout=1) llmobs_span_writer.start() @@ -122,10 +121,9 @@ def test_send_timed_events(mock_writer_logs, mock_http_writer_logs, mock_http_wr llmobs_span_writer.enqueue(_chat_completion_event()) time.sleep(0.1) mock_writer_logs.debug.assert_has_calls([mock.call("encode %d LLMObs span events to be sent", 1)]) - mock_http_writer_logs.error.assert_not_called() -def test_send_multiple_events(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response): +def test_send_multiple_events(mock_writer_logs, mock_http_writer_send_payload_response): with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")): llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=0.01, timeout=1) llmobs_span_writer.start() @@ -135,10 +133,9 @@ def test_send_multiple_events(mock_writer_logs, mock_http_writer_logs, mock_http llmobs_span_writer.enqueue(_chat_completion_event()) time.sleep(0.1) mock_writer_logs.debug.assert_has_calls([mock.call("encode %d LLMObs span events to be sent", 2)]) - mock_http_writer_logs.error.assert_not_called() -def test_send_on_exit(mock_writer_logs, run_python_code_in_subprocess): +def test_send_on_exit(run_python_code_in_subprocess): env = os.environ.copy() pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))] if "PYTHONPATH" in env: diff --git a/tests/llmobs/test_llmobs_trace_processor.py b/tests/llmobs/test_llmobs_trace_processor.py deleted file mode 100644 index b55286d49c8..00000000000 --- a/tests/llmobs/test_llmobs_trace_processor.py +++ /dev/null @@ -1,36 +0,0 @@ -import mock - -from ddtrace._trace.span import Span -from ddtrace.ext import SpanTypes -from ddtrace.llmobs._constants import SPAN_KIND -from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor -from tests.utils import override_global_config - - -def test_processor_returns_all_traces_by_default(): - """Test that the LLMObsTraceProcessor returns all traces by default.""" - trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock()) - root_llm_span = Span(name="span1", span_type=SpanTypes.LLM) - root_llm_span._set_ctx_item(SPAN_KIND, "llm") - trace1 = [root_llm_span] - assert trace_filter.process_trace(trace1) == trace1 - - -def test_processor_returns_all_traces_if_not_agentless(): - """Test that the LLMObsTraceProcessor returns all traces if DD_LLMOBS_AGENTLESS_ENABLED is not set to true.""" - with override_global_config(dict(_llmobs_agentless_enabled=False)): - trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock()) - root_llm_span = Span(name="span1", span_type=SpanTypes.LLM) - root_llm_span._set_ctx_item(SPAN_KIND, "llm") - trace1 = [root_llm_span] - assert trace_filter.process_trace(trace1) == trace1 - - -def test_processor_returns_none_in_agentless_mode(): - """Test that the LLMObsTraceProcessor returns None if DD_LLMOBS_AGENTLESS_ENABLED is set to true.""" - with override_global_config(dict(_llmobs_agentless_enabled=True)): - trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock()) - root_llm_span = Span(name="span1", span_type=SpanTypes.LLM) - root_llm_span._set_ctx_item(SPAN_KIND, "llm") - trace1 = [root_llm_span] - assert trace_filter.process_trace(trace1) is None diff --git a/tests/llmobs/test_propagation.py b/tests/llmobs/test_propagation.py index d892c6b98a2..d14b22d65d5 100644 --- a/tests/llmobs/test_propagation.py +++ b/tests/llmobs/test_propagation.py @@ -216,7 +216,6 @@ def test_activate_distributed_headers_propagate_correct_llmobs_parent_id_simple( env["DD_TRACE_ENABLED"] = "0" stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env) assert status == 0, (stdout, stderr) - assert stderr == b"", (stdout, stderr) headers = json.loads(stdout.decode()) LLMObs.activate_distributed_headers(headers) @@ -252,7 +251,6 @@ def test_activate_distributed_headers_propagate_llmobs_parent_id_complex(run_pyt env["DD_TRACE_ENABLED"] = "0" stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env) assert status == 0, (stdout, stderr) - assert stderr == b"", (stdout, stderr) headers = json.loads(stdout.decode()) LLMObs.activate_distributed_headers(headers)