From f625e3518cdb98ba6ab2825035aac56ada17ced0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?= Date: Fri, 24 Jan 2025 10:44:54 +0100 Subject: [PATCH 1/4] feat: convert project_id to string, no matter what the service returns --- .../connectors/studio/studio.py | 30 +++++++++++-------- tests/evaluation/benchmark/test_benchmark.py | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/intelligence_layer/connectors/studio/studio.py b/src/intelligence_layer/connectors/studio/studio.py index a7ed68ce..2e8add6e 100644 --- a/src/intelligence_layer/connectors/studio/studio.py +++ b/src/intelligence_layer/connectors/studio/studio.py @@ -9,7 +9,7 @@ from uuid import uuid4 import requests -from pydantic import BaseModel, Field, RootModel +from pydantic import BaseModel, Field, RootModel, field_validator from requests.exceptions import ConnectionError, MissingSchema from intelligence_layer.connectors import JsonSerializable @@ -96,7 +96,7 @@ class PostBenchmarkRequest(BaseModel): class GetBenchmarkResponse(BaseModel): id: str - project_id: int + project_id: str dataset_id: str name: str description: str | None @@ -109,6 +109,10 @@ class GetBenchmarkResponse(BaseModel): created_by: str | None updated_by: str | None + @field_validator("project_id", mode="before") + def transform_id_to_str(cls, value) -> str: + return str(value) + class PostBenchmarkExecution(BaseModel): name: str @@ -226,7 +230,7 @@ def __init__( self.url = StudioClient.get_url(studio_url) self._check_connection() self._project_name = project - self._project_id: int | None = None + self._project_id: str | None = None if create_project: project_id = self._get_project(self._project_name) @@ -256,7 +260,7 @@ def _check_connection(self) -> None: ) from None @property - def project_id(self) -> int: + def project_id(self) -> str: if self._project_id is None: project_id = self._get_project(self._project_name) if project_id is None: @@ -266,7 +270,7 @@ def project_id(self) -> int: self._project_id = project_id return self._project_id - def _get_project(self, project: str) -> int | None: + def _get_project(self, project_name: str) -> str | None: url = urljoin(self.url, "/api/projects") response = requests.get( url, @@ -276,24 +280,24 @@ def _get_project(self, project: str) -> int | None: all_projects = response.json() try: project_of_interest = next( - proj for proj in all_projects if proj["name"] == project + proj for proj in all_projects if proj["name"] == project_name ) - return int(project_of_interest["id"]) + return str(project_of_interest["id"]) except StopIteration: return None def create_project( self, - project: str, + project_name: str, description: Optional[str] = None, reuse_existing: bool = False, - ) -> int: + ) -> str: """Creates a project in Studio. Projects are uniquely identified by the user provided name. Args: - project: User provided name of the project. + project_name: User provided name of the project. description: Description explaining the usage of the project. Defaults to None. reuse_existing: Reuse project with specified name if already existing. Defaults to False. @@ -302,7 +306,7 @@ def create_project( The ID of the newly created project. """ url = urljoin(self.url, "/api/projects") - data = StudioProject(name=project, description=description) + data = StudioProject(name=project_name, description=description) response = requests.post( url, data=data.model_dump_json(), @@ -311,7 +315,7 @@ def create_project( match response.status_code: case 409: if reuse_existing: - fetched_project = self._get_project(project) + fetched_project = self._get_project(project_name) assert ( fetched_project is not None ), "Project already exists but not allowed to be used." @@ -319,7 +323,7 @@ def create_project( raise ValueError("Project already exists") case _: response.raise_for_status() - return int(response.text) + return response.text def submit_trace(self, data: Sequence[ExportedSpan]) -> str: """Sends the provided spans to Studio as a singular trace. diff --git a/tests/evaluation/benchmark/test_benchmark.py b/tests/evaluation/benchmark/test_benchmark.py index 40b15159..4fc9968d 100644 --- a/tests/evaluation/benchmark/test_benchmark.py +++ b/tests/evaluation/benchmark/test_benchmark.py @@ -42,7 +42,7 @@ def get_benchmark_response(datatset_id: str) -> GetBenchmarkResponse: aggregation_identifier = create_aggregation_logic_identifier(aggregation_logic) return GetBenchmarkResponse( id="id", - project_id=0, + project_id=str(uuid4()), dataset_id=datatset_id, name="name", description="description", From d7b180d0345b9c3676bd01a6be98a6e01a44c887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?= Date: Tue, 28 Jan 2025 14:36:09 +0100 Subject: [PATCH 2/4] docs: add changelog entry --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bcfb778..a7108ebd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ ### Deprecations ... +### Breaking Changes + - `StudioClient` now handles project_id as a string instead of an integer. This is only relevant when you handle project ids (not names) manually. + - `InMemoryDatasetRepository` now returns the exact types given by users when retrieving `Example`. Previously, it disregarded the types it was given and returned what was saved. + - This is in line with how the other repositories work. + - `EloQaEvaluationLogic` now has an expected output type of `None` instead of `SingleChunkQaOutput`. The information was unused. + - If you have pipelines that define data to be processed by this logic OR if you subclass from this specific logic, you may need to adapt it. ### Breaking Changes - `InMemoryDatasetRepository`, `InMemoryRunRepository`, `InMemoryEvaluationRepository`, and `InMemoryAggregationRepository` now either return the exact types given by users when retrieving example-related data or fail. Specifically, this means that passing the wrong type when retrieving data will now fail with a `ValidationError`. Previously, the repositories disregarded the types they were given and returned whatever object was saved. From d62ccbfdb115ff28d758f3273287d2dfd0fa8b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?= Date: Tue, 28 Jan 2025 14:44:41 +0100 Subject: [PATCH 3/4] fix: revert changing the create_project signature --- src/intelligence_layer/connectors/studio/studio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/intelligence_layer/connectors/studio/studio.py b/src/intelligence_layer/connectors/studio/studio.py index 2e8add6e..7dbc19aa 100644 --- a/src/intelligence_layer/connectors/studio/studio.py +++ b/src/intelligence_layer/connectors/studio/studio.py @@ -288,7 +288,7 @@ def _get_project(self, project_name: str) -> str | None: def create_project( self, - project_name: str, + project: str, description: Optional[str] = None, reuse_existing: bool = False, ) -> str: @@ -297,7 +297,7 @@ def create_project( Projects are uniquely identified by the user provided name. Args: - project_name: User provided name of the project. + project: User provided name of the project. description: Description explaining the usage of the project. Defaults to None. reuse_existing: Reuse project with specified name if already existing. Defaults to False. @@ -306,7 +306,7 @@ def create_project( The ID of the newly created project. """ url = urljoin(self.url, "/api/projects") - data = StudioProject(name=project_name, description=description) + data = StudioProject(name=project, description=description) response = requests.post( url, data=data.model_dump_json(), @@ -315,7 +315,7 @@ def create_project( match response.status_code: case 409: if reuse_existing: - fetched_project = self._get_project(project_name) + fetched_project = self._get_project(project) assert ( fetched_project is not None ), "Project already exists but not allowed to be used." From dd3e41bbd032b94bd363e248103d3ac76f542709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?= Date: Thu, 30 Jan 2025 18:01:49 +0100 Subject: [PATCH 4/4] fix: only convert project ids to string if they are int or uuid --- src/intelligence_layer/connectors/studio/studio.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/intelligence_layer/connectors/studio/studio.py b/src/intelligence_layer/connectors/studio/studio.py index 7dbc19aa..049af095 100644 --- a/src/intelligence_layer/connectors/studio/studio.py +++ b/src/intelligence_layer/connectors/studio/studio.py @@ -6,7 +6,7 @@ from datetime import datetime from typing import Any, Generic, Optional, TypeVar from urllib.parse import urljoin -from uuid import uuid4 +from uuid import UUID, uuid4 import requests from pydantic import BaseModel, Field, RootModel, field_validator @@ -111,7 +111,9 @@ class GetBenchmarkResponse(BaseModel): @field_validator("project_id", mode="before") def transform_id_to_str(cls, value) -> str: - return str(value) + if type(value) is int or type(value) is UUID: + return str(value) + return value class PostBenchmarkExecution(BaseModel):