From 26b552c01711db590d11aff813f228f653ba13bb Mon Sep 17 00:00:00 2001 From: "Tianyi (Alex) Qiu" Date: Fri, 20 Dec 2024 19:45:40 -0800 Subject: [PATCH] fix(abstractions): double deallocation in inference with continuous backends --- src/abstractions/backends.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/abstractions/backends.py b/src/abstractions/backends.py index f94645c..e95f6f9 100644 --- a/src/abstractions/backends.py +++ b/src/abstractions/backends.py @@ -378,6 +378,7 @@ def vllm_free_gpu_memory(): try: sgl.set_default_backend(sgl.RuntimeEndpoint(f"http://localhost:{port}")) + backend_key = None connected = True backend = None print("Connected to backend.", flush=True) @@ -671,7 +672,9 @@ def sglang_free_gpu_memory(): with open(f"{root}/output/backend_history.json", "r") as f: backend_history = json.load(f) - backend_history.pop(backend_key) + if backend_key: + backend_history.pop(backend_key) + with open(f"{root}/output/backend_history.json", "w") as f: json.dump(backend_history, f)