From b11aac51e2ee570b6914ad7e536b15ee543544de Mon Sep 17 00:00:00 2001 From: kingbri Date: Sat, 30 Mar 2024 10:42:43 -0400 Subject: [PATCH] Model: Add torch.inference_mode() to generator function Provides a speedup to model forward. Signed-off-by: kingbri --- backends/exllamav2/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 08e0daea..10fcb01a 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -648,6 +648,7 @@ async def generate_gen( async for value in iterate_in_threadpool(sync_generator): yield value + @torch.inference_mode() def generate_gen_sync( self, prompt: str, abort_event: Optional[threading.Event] = None, **kwargs ):