Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

heroku additions - initial test #7678

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ def identify(event_details):
gemini_models: List = []
xai_models: List = []
deepseek_models: List = []
heroku_models: List = []
azure_ai_models: List = []
voyage_models: List = []
databricks_models: List = []
Expand Down Expand Up @@ -568,6 +569,8 @@ def add_known_models():
xai_models.append(key)
elif value.get("litellm_provider") == "deepseek":
deepseek_models.append(key)
elif value.get("litellm_provider") == "heroku":
heroku_models.append(key)
elif value.get("litellm_provider") == "azure_ai":
azure_ai_models.append(key)
elif value.get("litellm_provider") == "voyage":
Expand Down Expand Up @@ -845,6 +848,7 @@ def add_known_models():
+ text_completion_codestral_models
+ xai_models
+ deepseek_models
+ heroku_models
+ azure_ai_models
+ voyage_models
+ databricks_models
Expand Down Expand Up @@ -895,6 +899,7 @@ def add_known_models():
"text-completion-codestral": text_completion_codestral_models,
"xai": xai_models,
"deepseek": deepseek_models,
"heroku": heroku_models,
"mistral": mistral_chat_models,
"azure_ai": azure_ai_models,
"voyage": voyage_models,
Expand Down Expand Up @@ -1169,6 +1174,7 @@ def add_known_models():
from .llms.azure.chat.gpt_transformation import AzureOpenAIConfig
from .llms.azure.completion.transformation import AzureOpenAITextConfig
from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
from .llms.heroku.chat.transformation import HerokuChatConfig
from .llms.litellm_proxy.chat.transformation import LiteLLMProxyChatConfig
from .llms.vllm.completion.transformation import VLLMConfig
from .llms.deepseek.chat.transformation import DeepSeekChatConfig
Expand Down
1 change: 1 addition & 0 deletions litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"custom",
"litellm_proxy",
"hosted_vllm",
"heroku"
"lm_studio",
"galadriel",
]
Expand Down
2 changes: 1 addition & 1 deletion litellm/llms/bedrock/chat/invoke_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Manages calling Bedrock's `/converse` API + `/invoke` API
Manages calling Bedrock's `/converse` API + `/invoke` API
"""

import copy
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan G1 /invoke format.
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan G1 /invoke format.

Why separate file? Make it easy to see how transformation works

Expand Down
176 changes: 176 additions & 0 deletions litellm/llms/heroku/chat/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

"""
Translates from OpenAI's `/v1/chat/completions` to Heroku's `/v1/chat/completions`
"""
import pdb
from typing import Optional, Tuple

from litellm.secret_managers.main import get_secret_str

from ...openai.chat.gpt_transformation import OpenAIGPTConfig
from ...openai_like.chat.transformation import OpenAILikeChatConfig
import httpx
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from typing import (
Optional,
Union,
List,
Any
)
from litellm.types.utils import ModelResponse
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj


class HerokuError(BaseLLMException):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we move this to a common_utils.py outside of the /chat

make it easier for someone to click on this file and directly see the transformation logic

def __init__(
self,
status_code: int,
message: str,
request: Optional[httpx.Request] = None,
response: Optional[httpx.Response] = None,
headers: Optional[Union[dict, httpx.Headers]] = None,
):
self.status_code = status_code
self.message = message
self.headers = headers
if request:
self.request = request
else:
self.request = httpx.Request(method="POST", url=get_secret_str("HEROKU_API_BASE"))
if response:
self.response = response
else:
self.response = httpx.Response(
status_code=status_code, request=self.request
)
super().__init__(
status_code=status_code,
message=self.message,
headers=self.headers,
request=self.request,
response=self.response,
)


class HerokuChatConfig(OpenAILikeChatConfig):
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
return super().map_openai_params(
non_default_params, optional_params, model, drop_params
)

def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
api_base = api_base or get_secret_str("HEROKU_API_BASE") # type: ignore
dynamic_api_key = (
api_key or get_secret_str("HEROKU_API_KEY") or "fake-api-key"
)
return api_base, dynamic_api_key


def validate_environment(
self,
headers: dict,
model: str,
messages,
optional_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
):
return {
"Authorization": f"Bearer {api_key or get_secret_str("HEROKU_API_KEY")}",
"content-type": "application/json",
**headers,
}

def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return HerokuError(
status_code=status_code,
message=error_message,
headers=headers,
)

def _handle_streaming_response(
self,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
messages: List,
) -> ModelResponse:
"""
Handle streamed responses from the API.
"""
streamed_content = []

# Assuming `raw_response` is an httpx.StreamingResponse object
for chunk in raw_response.iter_text():
streamed_content.append(chunk)
logging_obj.post_call(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's not call post_call in here - it'll get emitted on every chunk

input=messages,
api_key="",
original_response={"chunk": chunk},
)

# Join streamed content and set it as the final response
full_response = "".join(streamed_content)
model_response.content = full_response

return model_response

def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
messages: List,
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
"""
Transform the response from the API.

Handles both standard and streamed responses.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no don't handle both in here. we have a separate event hook for streaming -

def get_model_response_iterator(

"""
# Check if streaming is requested
stream = optional_params.get("stream", False)
print_verbose = optional_params.get("print_verbose", False)

if stream:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

streaming logic won't enter here - it'll go to 'get_model_response_iterator'

def get_model_response_iterator(

# Handle streaming logic separately if needed
return self._handle_streaming_response(
raw_response=raw_response,
model_response=model_response,
logging_obj=logging_obj,
messages=messages,
)

# Call the base method for standard (non-streaming) responses
return super()._transform_response(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if no translation is required, this function doesn't need to be implemented (i see you inherit from openailike config, which should take care of this)

model=model,
response=raw_response,
model_response=model_response,
stream=stream,
logging_obj=logging_obj,
optional_params=optional_params,
api_key=api_key,
data=request_data,
messages=messages,
print_verbose=print_verbose,
encoding=encoding,
json_mode=json_mode,
custom_llm_provider="heroku",
base_model=model,
)
2 changes: 1 addition & 1 deletion litellm/llms/openai/chat/gpt_transformation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Support for gpt model family
Support for gpt model family
"""

from typing import TYPE_CHECKING, Any, List, Optional, Union, cast
Expand Down
Loading