Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: convert project_id to string #1226

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

### Deprecations
...
### Breaking Changes
- `StudioClient` now handles project_id as a string instead of an integer. This is only relevant when you handle project ids (not names) manually.
- `InMemoryDatasetRepository` now returns the exact types given by users when retrieving `Example`. Previously, it disregarded the types it was given and returned what was saved.
- This is in line with how the other repositories work.
- `EloQaEvaluationLogic` now has an expected output type of `None` instead of `SingleChunkQaOutput`. The information was unused.
- If you have pipelines that define data to be processed by this logic OR if you subclass from this specific logic, you may need to adapt it.

### Breaking Changes
- `InMemoryDatasetRepository`, `InMemoryRunRepository`, `InMemoryEvaluationRepository`, and `InMemoryAggregationRepository` now either return the exact types given by users when retrieving example-related data or fail. Specifically, this means that passing the wrong type when retrieving data will now fail with a `ValidationError`. Previously, the repositories disregarded the types they were given and returned whatever object was saved.
Expand Down
26 changes: 16 additions & 10 deletions src/intelligence_layer/connectors/studio/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from datetime import datetime
from typing import Any, Generic, Optional, TypeVar
from urllib.parse import urljoin
from uuid import uuid4
from uuid import UUID, uuid4

import requests
from pydantic import BaseModel, Field, RootModel
from pydantic import BaseModel, Field, RootModel, field_validator
from requests.exceptions import ConnectionError, MissingSchema

from intelligence_layer.connectors import JsonSerializable
Expand Down Expand Up @@ -96,7 +96,7 @@ class PostBenchmarkRequest(BaseModel):

class GetBenchmarkResponse(BaseModel):
id: str
project_id: int
project_id: str
dataset_id: str
name: str
description: str | None
Expand All @@ -109,6 +109,12 @@ class GetBenchmarkResponse(BaseModel):
created_by: str | None
updated_by: str | None

@field_validator("project_id", mode="before")
def transform_id_to_str(cls, value) -> str:
if type(value) is int or type(value) is UUID:
return str(value)
return value


class PostBenchmarkExecution(BaseModel):
name: str
Expand Down Expand Up @@ -226,7 +232,7 @@ def __init__(
self.url = StudioClient.get_url(studio_url)
self._check_connection()
self._project_name = project
self._project_id: int | None = None
self._project_id: str | None = None

if create_project:
project_id = self._get_project(self._project_name)
Expand Down Expand Up @@ -256,7 +262,7 @@ def _check_connection(self) -> None:
) from None

@property
def project_id(self) -> int:
def project_id(self) -> str:
if self._project_id is None:
project_id = self._get_project(self._project_name)
if project_id is None:
Expand All @@ -266,7 +272,7 @@ def project_id(self) -> int:
self._project_id = project_id
return self._project_id

def _get_project(self, project: str) -> int | None:
def _get_project(self, project_name: str) -> str | None:
url = urljoin(self.url, "/api/projects")
response = requests.get(
url,
Expand All @@ -276,9 +282,9 @@ def _get_project(self, project: str) -> int | None:
all_projects = response.json()
try:
project_of_interest = next(
proj for proj in all_projects if proj["name"] == project
proj for proj in all_projects if proj["name"] == project_name
)
return int(project_of_interest["id"])
return str(project_of_interest["id"])
except StopIteration:
return None

Expand All @@ -287,7 +293,7 @@ def create_project(
project: str,
description: Optional[str] = None,
reuse_existing: bool = False,
) -> int:
) -> str:
"""Creates a project in Studio.

Projects are uniquely identified by the user provided name.
Expand Down Expand Up @@ -319,7 +325,7 @@ def create_project(
raise ValueError("Project already exists")
case _:
response.raise_for_status()
return int(response.text)
return response.text

def submit_trace(self, data: Sequence[ExportedSpan]) -> str:
"""Sends the provided spans to Studio as a singular trace.
Expand Down
2 changes: 1 addition & 1 deletion tests/evaluation/benchmark/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_benchmark_response(datatset_id: str) -> GetBenchmarkResponse:
aggregation_identifier = create_aggregation_logic_identifier(aggregation_logic)
return GetBenchmarkResponse(
id="id",
project_id=0,
project_id=str(uuid4()),
dataset_id=datatset_id,
name="name",
description="description",
Expand Down