From aa34b2e5fd7ff7a9875fb1f1219d360f9ed0a60c Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 17 Feb 2024 00:21:48 -0500
Subject: [PATCH] Model: Prefer auto over manual GPU split

For safety reasons, always use auto unless a manual split is provided
and auto is forced off.

If auto is forced off and a manual split isn't provided, a manual
split will be attempted.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 backends/exllamav2/model.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 9f8c1ced..1bf1d535 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -108,21 +108,24 @@ def progress(loaded_modules: int, total_modules: int,
 
         # Turn off GPU split if the user is using 1 GPU
         gpu_count = torch.cuda.device_count()
-        if gpu_count > 1:
-            gpu_split = kwargs.get("gpu_split")
+        gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
 
-            if gpu_split:
-                self.gpu_split = gpu_split
-            else:
-                # Auto GPU split parameters
-                self.gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
-                autosplit_reserve_megabytes = unwrap(
-                    kwargs.get("autosplit_reserve"), [96]
-                )
-                self.autosplit_reserve = list(
-                    map(lambda value: value * 1024**2, autosplit_reserve_megabytes)
-                )
+        if gpu_count > 1 and gpu_split_auto:
+            # Auto GPU split parameters
+            self.gpu_split_auto = gpu_split_auto
+
+            autosplit_reserve_megabytes = unwrap(
+                kwargs.get("autosplit_reserve"), [96]
+            )
+            self.autosplit_reserve = list(
+                map(lambda value: value * 1024**2, autosplit_reserve_megabytes)
+            )
+        elif gpu_count > 1:
+            # Manual GPU split
+            self.gpu_split = kwargs.get("gpu_split")
+            self.gpu_split_auto = False
         else:
+            # One GPU setup
             self.gpu_split_auto = False
             logger.info("Disabling GPU split because one GPU is in use.")