Skip to content

Commit

Permalink
(feat) - provider budget improvements - ensure provider budgets work …
Browse files Browse the repository at this point in the history
…with multiple proxy instances + improve latency to ~90ms (#6886)

* use 1 file for duration_in_seconds

* add to readme.md

* re use duration_in_seconds

* fix importing _extract_from_regex, get_last_day_of_month

* fix import

* update provider budget routing

* fix - remove dup test

* add support for using in multi instance environments

* test_in_memory_redis_sync_e2e

* test_in_memory_redis_sync_e2e

* fix test_in_memory_redis_sync_e2e

* fix code quality check

* fix test provider budgets

* working provider budget tests

* add fixture for provider budget routing

* fix router testing for provider budgets

* add comments on provider budget routing

* use RedisPipelineIncrementOperation

* add redis async_increment_pipeline

* use redis async_increment_pipeline

* use lower value for testing

* use redis async_increment_pipeline

* use consistent key name for increment op

* add handling for budget windows

* fix typing async_increment_pipeline

* fix set attr

* add clear doc strings

* unit testing for provider budgets

* test_redis_increment_pipeline
  • Loading branch information
ishaan-jaff authored Nov 25, 2024
1 parent 34bfebe commit c73ce95
Show file tree
Hide file tree
Showing 7 changed files with 633 additions and 47 deletions.
63 changes: 45 additions & 18 deletions docs/my-website/docs/proxy/provider_budget_routing.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,27 @@ model_list:
api_key: os.environ/OPENAI_API_KEY

router_settings:
redis_host: <your-redis-host>
redis_password: <your-redis-password>
redis_port: <your-redis-port>
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d

# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD

general_settings:
master_key: sk-1234
Expand Down Expand Up @@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
```
## Multi-instance setup
If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
router_settings:
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
# 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
general_settings:
master_key: sk-1234
```

## Spec for provider_budget_config

Expand Down
90 changes: 90 additions & 0 deletions litellm/caching/redis_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.types.caching import RedisPipelineIncrementOperation
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
from litellm.types.utils import all_litellm_params

Expand Down Expand Up @@ -890,3 +891,92 @@ async def async_delete_cache(self, key: str):

def delete_cache(self, key):
self.redis_client.delete(key)

async def _pipeline_increment_helper(
self,
pipe: pipeline,
increment_list: List[RedisPipelineIncrementOperation],
) -> Optional[List[float]]:
"""Helper function for pipeline increment operations"""
# Iterate through each increment operation and add commands to pipeline
for increment_op in increment_list:
cache_key = self.check_and_fix_namespace(key=increment_op["key"])
print_verbose(
f"Increment ASYNC Redis Cache PIPELINE: key: {cache_key}\nValue {increment_op['increment_value']}\nttl={increment_op['ttl']}"
)
pipe.incrbyfloat(cache_key, increment_op["increment_value"])
if increment_op["ttl"] is not None:
_td = timedelta(seconds=increment_op["ttl"])
pipe.expire(cache_key, _td)
# Execute the pipeline and return results
results = await pipe.execute()
print_verbose(f"Increment ASYNC Redis Cache PIPELINE: results: {results}")
return results

async def async_increment_pipeline(
self, increment_list: List[RedisPipelineIncrementOperation], **kwargs
) -> Optional[List[float]]:
"""
Use Redis Pipelines for bulk increment operations
Args:
increment_list: List of RedisPipelineIncrementOperation dicts containing:
- key: str
- increment_value: float
- ttl_seconds: int
"""
# don't waste a network request if there's nothing to increment
if len(increment_list) == 0:
return None

from redis.asyncio import Redis

_redis_client: Redis = self.init_async_client() # type: ignore
start_time = time.time()

print_verbose(
f"Increment Async Redis Cache Pipeline: increment list: {increment_list}"
)

try:
async with _redis_client as redis_client:
async with redis_client.pipeline(transaction=True) as pipe:
results = await self._pipeline_increment_helper(
pipe, increment_list
)

print_verbose(f"pipeline increment results: {results}")

## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_increment_pipeline",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
return results
except Exception as e:
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_increment_pipeline",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
verbose_logger.error(
"LiteLLM Redis Caching: async increment_pipeline() - Got exception from REDIS %s",
str(e),
)
raise e
23 changes: 19 additions & 4 deletions litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,23 @@ model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: fake-anthropic-endpoint
litellm_params:
model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/

router_settings:
provider_budget_config:
openai:
budget_limit: 0.3 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d
anthropic:
budget_limit: 5
time_period: 1d
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD

default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
litellm_settings:
callbacks: ["prometheus"]
Loading

0 comments on commit c73ce95

Please sign in to comment.