From 75f969a6d3efd28fcb521100669ba2594f3ba14c Mon Sep 17 00:00:00 2001
From: turboderp <11859846+turboderp@users.noreply.github.com>
Date: Thu, 15 Feb 2024 00:07:47 +0100
Subject: [PATCH] Disable cudaMallocAsync for post2 release

---
 exllamav2/model.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/exllamav2/model.py b/exllamav2/model.py
index 780e6d5a..d13b4a8f 100644
--- a/exllamav2/model.py
+++ b/exllamav2/model.py
@@ -9,14 +9,16 @@
 # Set CUDA context to lazy loading since we won't need 95% of the modules in Torch
 os.environ["CUDA_MODULE_LOADING"] = "LAZY"
 
-# Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
-# imported in which case changing the allocator would cause it to crash
-if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
-    try:
-        x = torch.__version__
-        # TODO: Should maybe be a warning here?
-    except NameError:
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
+# Disabled for 0.0.13.post2
+#
+# # Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
+# # imported in which case changing the allocator would cause it to crash
+# if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
+#     try:
+#         x = torch.__version__
+#         # TODO: Should maybe be a warning here?
+#     except NameError:
+#         os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
 
 import torch
 import math