diff --git a/.github/workflows/lib-langchain-tests.yml b/.github/workflows/lib-langchain-tests.yml index e6525ddb7a..2fd4a9b91a 100644 --- a/.github/workflows/lib-langchain-tests.yml +++ b/.github/workflows/lib-langchain-tests.yml @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: true matrix: - python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python_version: ["3.9", "3.10", "3.11", "3.12"] steps: - name: Check out code diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py index df0876bccb..f61b351871 100644 --- a/sdks/python/src/opik/api_objects/opik_client.py +++ b/sdks/python/src/opik/api_objects/opik_client.py @@ -300,7 +300,9 @@ def span( output=output, metadata=metadata, tags=tags, - usage=parsed_usage.supported_usage, + usage=parsed_usage.full_usage + if provider == "google_vertexai" + else parsed_usage.supported_usage, model=model, provider=provider, error_info=error_info, diff --git a/sdks/python/src/opik/integrations/langchain/google_run_helpers.py b/sdks/python/src/opik/integrations/langchain/google_run_helpers.py new file mode 100644 index 0000000000..b0d54bf6d7 --- /dev/null +++ b/sdks/python/src/opik/integrations/langchain/google_run_helpers.py @@ -0,0 +1,80 @@ +import logging +from typing import Any, Dict, Optional, TYPE_CHECKING, Tuple, cast + +from opik import logging_messages +from opik.types import LLMUsageInfo, UsageDict +from opik.validation import usage as usage_validator + +if TYPE_CHECKING: + from langchain_core.tracers.schemas import Run + +LOGGER = logging.getLogger(__name__) + + +def get_llm_usage_info(run_dict: Optional[Dict[str, Any]] = None) -> LLMUsageInfo: + if run_dict is None: + return LLMUsageInfo() + + usage_dict = _try_get_token_usage(run_dict) + provider, model = _get_provider_and_model(run_dict) + + return LLMUsageInfo(provider=provider, model=model, usage=usage_dict) + + +def _try_get_token_usage(run_dict: Dict[str, Any]) -> Optional[UsageDict]: + try: + usage_metadata = run_dict["outputs"]["generations"][-1][-1]["generation_info"][ + "usage_metadata" + ] + + token_usage = UsageDict( + completion_tokens=usage_metadata["candidates_token_count"], + prompt_tokens=usage_metadata["prompt_token_count"], + total_tokens=usage_metadata["total_token_count"], + ) + token_usage.update(usage_metadata) + + if usage_validator.UsageValidator(token_usage).validate().ok(): + return cast(UsageDict, token_usage) + + return None + except Exception: + LOGGER.warning( + logging_messages.FAILED_TO_EXTRACT_TOKEN_USAGE_FROM_PRESUMABLY_LANGCHAIN_GOOGLE_LLM_RUN, + exc_info=True, + ) + return None + + +def is_google_run(run: "Run") -> bool: + try: + if run.serialized is None: + return False + + provider = run.metadata.get("ls_provider", "") + is_google = "google" in provider.lower() + + return is_google + + except Exception: + LOGGER.debug( + "Failed to check if Run instance is from Google LLM, returning False.", + exc_info=True, + ) + return False + + +def _get_provider_and_model( + run_dict: Dict[str, Any], +) -> Tuple[Optional[str], Optional[str]]: + """ + Fetches the provider and model information from a given run dictionary. + """ + provider = None + model = None + + if metadata := run_dict["extra"].get("metadata"): + provider = metadata.get("ls_provider") + model = metadata.get("ls_model_name") + + return provider, model diff --git a/sdks/python/src/opik/integrations/langchain/openai_run_helpers.py b/sdks/python/src/opik/integrations/langchain/openai_run_helpers.py index f019264706..4bfd43c854 100644 --- a/sdks/python/src/opik/integrations/langchain/openai_run_helpers.py +++ b/sdks/python/src/opik/integrations/langchain/openai_run_helpers.py @@ -1,9 +1,8 @@ -import dataclasses import logging from typing import Any, Dict, Optional, TYPE_CHECKING, Tuple, cast from opik import logging_messages -from opik.types import UsageDict +from opik.types import LLMUsageInfo, UsageDict from opik.validation import usage as usage_validator if TYPE_CHECKING: @@ -13,13 +12,6 @@ LOGGER = logging.getLogger(__name__) -@dataclasses.dataclass -class LLMUsageInfo: - provider: Optional[str] = None - model: Optional[str] = None - token_usage: Optional[UsageDict] = None - - def get_llm_usage_info(run_dict: Optional[Dict[str, Any]] = None) -> LLMUsageInfo: if run_dict is None: return LLMUsageInfo() @@ -27,7 +19,7 @@ def get_llm_usage_info(run_dict: Optional[Dict[str, Any]] = None) -> LLMUsageInf usage_dict = _try_get_token_usage(run_dict) provider, model = _get_provider_and_model(run_dict) - return LLMUsageInfo(provider=provider, model=model, token_usage=usage_dict) + return LLMUsageInfo(provider=provider, model=model, usage=usage_dict) def _try_get_token_usage(run_dict: Dict[str, Any]) -> Optional[UsageDict]: diff --git a/sdks/python/src/opik/integrations/langchain/opik_tracer.py b/sdks/python/src/opik/integrations/langchain/opik_tracer.py index d3945df877..c5b12f81d6 100644 --- a/sdks/python/src/opik/integrations/langchain/opik_tracer.py +++ b/sdks/python/src/opik/integrations/langchain/opik_tracer.py @@ -1,13 +1,19 @@ import logging from typing import Any, Dict, List, Literal, Optional, Set, TYPE_CHECKING -from opik.types import ErrorInfoDict + +from opik.types import ErrorInfoDict, LLMUsageInfo from langchain_core import language_models from langchain_core.tracers import BaseTracer from opik import dict_utils, opik_context from opik.api_objects import opik_client, span, trace -from . import base_llm_patcher, openai_run_helpers, opik_encoder_extension +from . import ( + base_llm_patcher, + google_run_helpers, + openai_run_helpers, + opik_encoder_extension, +) from ...api_objects import helpers if TYPE_CHECKING: @@ -232,15 +238,16 @@ def _process_end_span(self, run: "Run") -> None: # Langchain will call _persist_run for us else: span_data = self._span_data_map[run.id] + usage_info: LLMUsageInfo = LLMUsageInfo() if openai_run_helpers.is_openai_run(run): usage_info = openai_run_helpers.get_llm_usage_info(run_dict) - else: - usage_info = openai_run_helpers.get_llm_usage_info() + elif google_run_helpers.is_google_run(run): + usage_info = google_run_helpers.get_llm_usage_info(run_dict) span_data.init_end_time().update( output=run_dict["outputs"], - usage=usage_info.token_usage, + usage=usage_info.usage, provider=usage_info.provider, model=usage_info.model, ) diff --git a/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py b/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py index 8edc7e158e..f813f94518 100644 --- a/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py +++ b/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py @@ -1,18 +1,10 @@ -import dataclasses from typing import Any, Dict, Optional from llama_index.core import Settings from llama_index.core.base.llms.types import ChatResponse from llama_index.core.callbacks import schema as llama_index_schema -from opik.types import UsageDict - - -@dataclasses.dataclass -class LLMUsageInfo: - provider: Optional[str] = None - model: Optional[str] = None - usage: Optional[UsageDict] = None +from opik.types import LLMUsageInfo def get_span_input_from_events( diff --git a/sdks/python/src/opik/logging_messages.py b/sdks/python/src/opik/logging_messages.py index 6dba1a9a62..d2a4b7037a 100644 --- a/sdks/python/src/opik/logging_messages.py +++ b/sdks/python/src/opik/logging_messages.py @@ -14,6 +14,10 @@ "Failed to extract token usage from presumably OpenAI LLM langchain run." ) +FAILED_TO_EXTRACT_TOKEN_USAGE_FROM_PRESUMABLY_LANGCHAIN_GOOGLE_LLM_RUN = ( + "Failed to extract token usage from presumably Google LLM langchain run." +) + UNEXPECTED_EXCEPTION_ON_SPAN_CREATION_FOR_TRACKED_FUNCTION = "Unexpected exception happened when tried to create a span for function %s.\nInputs: %s\nError message: %s" UNEXPECTED_EXCEPTION_ON_SPAN_FINALIZATION_FOR_TRACKED_FUNCTION = "Unexpected exception happened when tried to finalize span.\nOutput: %s\nError message: %s" diff --git a/sdks/python/src/opik/message_processing/messages.py b/sdks/python/src/opik/message_processing/messages.py index d8072a6c58..6e38e85a5b 100644 --- a/sdks/python/src/opik/message_processing/messages.py +++ b/sdks/python/src/opik/message_processing/messages.py @@ -1,6 +1,6 @@ import dataclasses import datetime -from typing import Optional, Any, Dict, List +from typing import Optional, Any, Dict, List, Union from ..types import UsageDict, SpanType, ErrorInfoDict @@ -61,7 +61,7 @@ class CreateSpanMessage(BaseMessage): metadata: Optional[Dict[str, Any]] tags: Optional[List[str]] type: SpanType - usage: Optional[UsageDict] + usage: Optional[Union[UsageDict, Dict[str, int]]] model: Optional[str] provider: Optional[str] error_info: Optional[ErrorInfoDict] @@ -74,7 +74,7 @@ def as_payload_dict(self) -> Dict[str, Any]: @dataclasses.dataclass class UpdateSpanMessage(BaseMessage): - "Not recommended to use. Kept only for low level update operations in public API" + """Not recommended to use. Kept only for low level update operations in public API""" span_id: str parent_span_id: Optional[str] @@ -85,7 +85,7 @@ class UpdateSpanMessage(BaseMessage): output: Optional[Dict[str, Any]] metadata: Optional[Dict[str, Any]] tags: Optional[List[str]] - usage: Optional[UsageDict] + usage: Optional[Union[UsageDict, Dict[str, int]]] model: Optional[str] provider: Optional[str] error_info: Optional[ErrorInfoDict] diff --git a/sdks/python/src/opik/types.py b/sdks/python/src/opik/types.py index 18ccd572c0..886fc01a4e 100644 --- a/sdks/python/src/opik/types.py +++ b/sdks/python/src/opik/types.py @@ -1,3 +1,4 @@ +import dataclasses import sys from typing import Literal, Optional @@ -77,3 +78,10 @@ class ErrorInfoDict(TypedDict): traceback: str """Exception traceback""" + + +@dataclasses.dataclass +class LLMUsageInfo: + provider: Optional[str] = None + model: Optional[str] = None + usage: Optional[UsageDict] = None diff --git a/sdks/python/tests/library_integration/conftest.py b/sdks/python/tests/library_integration/conftest.py index 4076a32ce0..1af765e167 100644 --- a/sdks/python/tests/library_integration/conftest.py +++ b/sdks/python/tests/library_integration/conftest.py @@ -9,3 +9,20 @@ def ensure_openai_configured(): if not ("OPENAI_API_KEY" in os.environ and "OPENAI_ORG_ID" in os.environ): raise Exception("OpenAI not configured!") + + +@pytest.fixture +def gcp_e2e_test_credentials(): + gcp_credentials_file_name = "gcp_credentials.json" + + gcp_credentials = os.environ["GCP_E2E_TEST_CREDENTIALS"] + + with open(gcp_credentials_file_name, mode="wt") as file: + file.write(gcp_credentials) + + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = gcp_credentials_file_name + + yield + + del os.environ["GOOGLE_APPLICATION_CREDENTIALS"] + os.remove(gcp_credentials_file_name) diff --git a/sdks/python/tests/library_integration/langchain/requirements.txt b/sdks/python/tests/library_integration/langchain/requirements.txt index 22a36cda8e..11deca465b 100644 --- a/sdks/python/tests/library_integration/langchain/requirements.txt +++ b/sdks/python/tests/library_integration/langchain/requirements.txt @@ -1,2 +1,3 @@ langchain_community +langchain_google_vertexai langchain_openai diff --git a/sdks/python/tests/library_integration/langchain/test_langchain.py b/sdks/python/tests/library_integration/langchain/test_langchain.py index 9534d56d59..4dd34bde68 100644 --- a/sdks/python/tests/library_integration/langchain/test_langchain.py +++ b/sdks/python/tests/library_integration/langchain/test_langchain.py @@ -1,3 +1,4 @@ +import langchain_google_vertexai import langchain_openai import pytest from langchain.llms import fake @@ -34,7 +35,7 @@ def test_langchain__happyflow( responses=["I'm sorry, I don't think I'm talented enough to write a synopsis"] ) - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -90,7 +91,7 @@ def test_langchain__happyflow( name="FakeListLLM", input={ "prompts": [ - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris." + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris." ] }, output=ANY_DICT, @@ -126,11 +127,11 @@ def test_langchain__happyflow( [ ( langchain_openai.OpenAI, - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris.", + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris.", ), ( langchain_openai.ChatOpenAI, - "Human: Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris.", + "Human: Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris.", ), ], ) @@ -139,7 +140,7 @@ def test_langchain__openai_llm_is_used__token_usage_is_logged__happyflow( ): llm = llm_model(max_tokens=10, name="custom-openai-llm-name") - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -210,14 +211,136 @@ def test_langchain__openai_llm_is_used__token_usage_is_logged__happyflow( assert_equal(EXPECTED_TRACE_TREE, fake_backend.trace_trees[0]) -def test_langchain__openai_llm_is_used__error_occured_during_openai_call__error_info_is_logged( +@pytest.mark.skip +@pytest.mark.parametrize( + "llm_model, expected_input_prompt, metadata_usage", + [ + ( + langchain_google_vertexai.VertexAI, + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris.", + { + # openai format + "completion_tokens": ANY_BUT_NONE, + "prompt_tokens": ANY_BUT_NONE, + "total_tokens": ANY_BUT_NONE, + # VertexAI format + # "cached_content_token_count": ANY_BUT_NONE, + "candidates_token_count": ANY_BUT_NONE, + "prompt_token_count": ANY_BUT_NONE, + "total_token_count": ANY_BUT_NONE, + }, + ), + ( + langchain_google_vertexai.ChatVertexAI, + "Human: Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris.", + { + # openai format + "completion_tokens": ANY_BUT_NONE, + "prompt_tokens": ANY_BUT_NONE, + "total_tokens": ANY_BUT_NONE, + # ChatVertexAI format + "cached_content_token_count": ANY_BUT_NONE, + "candidates_token_count": ANY_BUT_NONE, + "prompt_token_count": ANY_BUT_NONE, + "total_token_count": ANY_BUT_NONE, + }, + ), + ], +) +def test_langchain__google_vertexai_llm_is_used__token_usage_is_logged__happyflow( + fake_backend, + gcp_e2e_test_credentials, + llm_model, + expected_input_prompt, + metadata_usage, +): + llm = llm_model( + max_tokens=10, + model_name="gemini-1.5-flash", + name="custom-google-vertexai-llm-name", + ) + + template = "Given the title of play, write a synopsys for that. Title: {title}." + + prompt_template = PromptTemplate(input_variables=["title"], template=template) + + synopsis_chain = prompt_template | llm + test_prompts = {"title": "Documentary about Bigfoot in Paris"} + + callback = OpikTracer(tags=["tag1", "tag2"], metadata={"a": "b"}) + synopsis_chain.invoke(input=test_prompts, config={"callbacks": [callback]}) + + callback.flush() + + EXPECTED_TRACE_TREE = TraceModel( + id=ANY_BUT_NONE, + name="RunnableSequence", + input={"title": "Documentary about Bigfoot in Paris"}, + output=ANY_BUT_NONE, + tags=["tag1", "tag2"], + metadata={"a": "b"}, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + spans=[ + SpanModel( + id=ANY_BUT_NONE, + name="RunnableSequence", + input={"title": "Documentary about Bigfoot in Paris"}, + output=ANY_BUT_NONE, + tags=["tag1", "tag2"], + metadata={"a": "b"}, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + spans=[ + SpanModel( + id=ANY_BUT_NONE, + type="general", + name="PromptTemplate", + input={"title": "Documentary about Bigfoot in Paris"}, + output={"output": ANY_BUT_NONE}, + metadata={}, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + spans=[], + ), + SpanModel( + id=ANY_BUT_NONE, + type="llm", + name="custom-google-vertexai-llm-name", + input={"prompts": [expected_input_prompt]}, + output=ANY_BUT_NONE, + metadata={ + "batch_size": ANY_BUT_NONE, + "invocation_params": ANY_DICT, + "metadata": ANY_DICT, + "options": ANY_DICT, + "usage": metadata_usage, + }, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + usage=metadata_usage, + spans=[], + provider="google_vertexai", + model=ANY_STRING(startswith="gemini-1.5-flash"), + ), + ], + ) + ], + ) + + assert len(fake_backend.trace_trees) == 1 + assert len(callback.created_traces()) == 1 + assert_equal(EXPECTED_TRACE_TREE, fake_backend.trace_trees[0]) + + +def test_langchain__openai_llm_is_used__error_occurred_during_openai_call__error_info_is_logged( fake_backend, ): llm = langchain_openai.OpenAI( max_tokens=10, name="custom-openai-llm-name", api_key="incorrect-api-key" ) - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -275,7 +398,7 @@ def test_langchain__openai_llm_is_used__error_occured_during_openai_call__error_ name="custom-openai-llm-name", input={ "prompts": [ - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris." + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris." ] }, output=None, @@ -319,7 +442,7 @@ def f(x): ] ) - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -382,7 +505,7 @@ def f(x): name="FakeListLLM", input={ "prompts": [ - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris." + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris." ] }, output=ANY_DICT, @@ -427,7 +550,7 @@ def f(): ] ) - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -493,7 +616,7 @@ def f(): name="FakeListLLM", input={ "prompts": [ - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris." + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris." ] }, output=ANY_DICT, @@ -536,7 +659,7 @@ def f(): ] ) - template = "Given the title of play, right a synopsys for that. Title: {title}." + template = "Given the title of play, write a synopsys for that. Title: {title}." prompt_template = PromptTemplate(input_variables=["title"], template=template) @@ -599,7 +722,7 @@ def f(): name="FakeListLLM", input={ "prompts": [ - "Given the title of play, right a synopsys for that. Title: Documentary about Bigfoot in Paris." + "Given the title of play, write a synopsys for that. Title: Documentary about Bigfoot in Paris." ] }, output=ANY_DICT,