Skip to content

Commit

Permalink
fix: conditionally enable LlamaRAMCache (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
lsorber authored Jan 6, 2025
1 parent b19963d commit 8f4bd5f
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/raglite/_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
CreateChatCompletionStreamResponse,
Llama,
LlamaRAMCache,
llama_supports_gpu_offload,
)

from raglite._chatml_function_calling import chatml_function_calling_with_streaming
Expand Down Expand Up @@ -126,7 +127,8 @@ def llm(model: str, **kwargs: Any) -> Llama:
**kwargs,
)
# Enable caching.
llm.set_cache(LlamaRAMCache())
if llama_supports_gpu_offload() or (os.cpu_count() or 1) >= 8: # noqa: PLR2004
llm.set_cache(LlamaRAMCache())
# Register the model info with LiteLLM.
model_info = {
repo_id_filename: {
Expand Down

0 comments on commit 8f4bd5f

Please sign in to comment.