BerriAI · krrishdholakia · Mar 24, 2025 · Mar 19, 2025
diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt
@@ -4,7 +4,8 @@ python-dotenv
 tiktoken
 importlib_metadata
 cohere
-redis
+redis==5.2.1
+redisvl==0.4.1
 anthropic
 orjson==3.9.15
 pydantic==2.7.1

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.python-version
 .venv
 .env
 .newenv

diff --git a/Dockerfile b/Dockerfile
@@ -37,9 +37,6 @@ RUN pip install dist/*.whl
 # install dependencies as wheels
 RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
 
-# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
-RUN pip install redisvl==0.0.7 --no-deps
-
 # ensure pyjwt is used, not jwt
 RUN pip uninstall jwt -y
 RUN pip uninstall PyJWT -y

diff --git a/docker/Dockerfile.database b/docker/Dockerfile.database
@@ -59,9 +59,6 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 
-# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
-RUN pip install redisvl==0.0.7 --no-deps
-
 # ensure pyjwt is used, not jwt
 RUN pip uninstall jwt -y
 RUN pip uninstall PyJWT -y

diff --git a/docker/Dockerfile.non_root b/docker/Dockerfile.non_root
@@ -14,7 +14,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
-    apt-get install -y gcc python3-dev && \
+    apt-get install -y gcc g++ python3-dev && \
     rm -rf /var/lib/apt/lists/*
 
 RUN pip install --no-cache-dir --upgrade pip && \
@@ -56,10 +56,8 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 
-# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
 # ensure pyjwt is used, not jwt
-RUN pip install redisvl==0.0.7 --no-deps --no-cache-dir && \
-    pip uninstall jwt -y && \
+RUN pip uninstall jwt -y && \
     pip uninstall PyJWT -y && \
     pip install PyJWT==2.9.0 --no-cache-dir
 

diff --git a/docs/my-website/docs/caching/all_caches.md b/docs/my-website/docs/caching/all_caches.md
@@ -26,7 +26,7 @@ Install redis
 pip install redis
 ```
 
-For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
+For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
 
 ```python
 import litellm
@@ -37,11 +37,11 @@ litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password
 
 # Make completion calls
 response1 = completion(
-    model="gpt-3.5-turbo", 
+    model="gpt-3.5-turbo",
     messages=[{"role": "user", "content": "Tell me a joke."}]
 )
 response2 = completion(
-    model="gpt-3.5-turbo", 
+    model="gpt-3.5-turbo",
     messages=[{"role": "user", "content": "Tell me a joke."}]
 )
 
@@ -91,12 +91,12 @@ response2 = completion(
 
 <TabItem value="redis-sem" label="redis-semantic cache">
 
-Install redis
+Install redisvl client
 ```shell
-pip install redisvl==0.0.7
+pip install redisvl==0.4.1
 ```
 
-For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
+For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
 
 ```python
 import litellm
@@ -114,6 +114,7 @@ litellm.cache = Cache(
     port=os.environ["REDIS_PORT"],
     password=os.environ["REDIS_PASSWORD"],
     similarity_threshold=0.8, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
+    ttl=120,
     redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here
 )
 response1 = completion(
@@ -471,11 +472,13 @@ def __init__(
     password: Optional[str] = None,
     namespace: Optional[str] = None,
     default_in_redis_ttl: Optional[float] = None,
-    similarity_threshold: Optional[float] = None,
-    redis_semantic_cache_use_async=False,
-    redis_semantic_cache_embedding_model="text-embedding-ada-002",
     redis_flush_size=None,
 
+    # redis semantic cache params
+    similarity_threshold: Optional[float] = None,
+    redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
+    redis_semantic_cache_index_name: Optional[str] = None,
+
     # s3 Bucket, boto3 configuration
     s3_bucket_name: Optional[str] = None,
     s3_region_name: Optional[str] = None,

diff --git a/litellm/caching/caching.py b/litellm/caching/caching.py
@@ -88,16 +88,16 @@ def __init__(
         s3_aws_session_token: Optional[str] = None,
         s3_config: Optional[Any] = None,
         s3_path: Optional[str] = None,
-        redis_semantic_cache_use_async=False,
-        redis_semantic_cache_embedding_model="text-embedding-ada-002",
+        redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
+        redis_semantic_cache_index_name: Optional[str] = None,
         redis_flush_size: Optional[int] = None,
         redis_startup_nodes: Optional[List] = None,
-        disk_cache_dir=None,
+        disk_cache_dir: Optional[str] = None,
         qdrant_api_base: Optional[str] = None,
         qdrant_api_key: Optional[str] = None,
         qdrant_collection_name: Optional[str] = None,
         qdrant_quantization_config: Optional[str] = None,
-        qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
+        qdrant_semantic_cache_embedding_model: str = "text-embedding-ada-002",
         **kwargs,
     ):
         """
@@ -170,8 +170,8 @@ def __init__(
                 port=port,
                 password=password,
                 similarity_threshold=similarity_threshold,
-                use_async=redis_semantic_cache_use_async,
                 embedding_model=redis_semantic_cache_embedding_model,
+                index_name=redis_semantic_cache_index_name,
                 **kwargs,
             )
         elif type == LiteLLMCacheType.QDRANT_SEMANTIC:
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    .python-version
     .venv
     .env
     .newenv
@@ Expand Down @@