From 75f969a6d3efd28fcb521100669ba2594f3ba14c Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Thu, 15 Feb 2024 00:07:47 +0100 Subject: [PATCH] Disable cudaMallocAsync for post2 release --- exllamav2/model.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/exllamav2/model.py b/exllamav2/model.py index 780e6d5a..d13b4a8f 100644 --- a/exllamav2/model.py +++ b/exllamav2/model.py @@ -9,14 +9,16 @@ # Set CUDA context to lazy loading since we won't need 95% of the modules in Torch os.environ["CUDA_MODULE_LOADING"] = "LAZY" -# Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already -# imported in which case changing the allocator would cause it to crash -if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ: - try: - x = torch.__version__ - # TODO: Should maybe be a warning here? - except NameError: - os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync" +# Disabled for 0.0.13.post2 +# +# # Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already +# # imported in which case changing the allocator would cause it to crash +# if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ: +# try: +# x = torch.__version__ +# # TODO: Should maybe be a warning here? +# except NameError: +# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync" import torch import math