From c5ebd5f3c028cc5e49a3bbeb4aa4f570f7b1e801 Mon Sep 17 00:00:00 2001
From: yyhhyy <yyhhyyyyyy@163.com>
Date: Thu, 4 Apr 2024 12:03:25 +0800
Subject: [PATCH 1/5] Upgrade Torch version to 2.1.2

---
 setup.py | 82 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 30 deletions(-)

diff --git a/setup.py b/setup.py
index 7a122d1b2..f7b94ca09 100644
--- a/setup.py
+++ b/setup.py
@@ -227,7 +227,7 @@ def _build_wheels(
     base_url: str = None,
     base_url_func: Callable[[str, str, str], str] = None,
     pkg_file_func: Callable[[str, str, str, str, OSType], str] = None,
-    supported_cuda_versions: List[str] = ["11.7", "11.8"],
+    supported_cuda_versions: List[str] = ["11.7", "11.8", "12.1"],
 ) -> Optional[str]:
     """
     Build the URL for the package wheel file based on the package name, version, and CUDA version.
@@ -272,49 +272,67 @@ def _build_wheels(
         return f"{base_url}/{full_pkg_file}"
 
 
-def torch_requires(
-    torch_version: str = "2.0.1",
-    torchvision_version: str = "0.15.2",
-    torchaudio_version: str = "2.0.2",
-):
+def torch_requires():
+    os_type, _ = get_cpu_avx_support()
+    cuda_version = get_cuda_version()
+
+    # Determine the default versions for CPU installations or if CUDA is not available
+    if os_type == OSType.DARWIN or not cuda_version:
+        torch_version = "2.1.2"
+        torchvision_version = "0.16.2"
+        torchaudio_version = "2.1.2"
+    else:
+        # Use default versions for older CUDA (< 11.8)
+        torch_version = "2.0.1"
+        torchvision_version = "0.15.2"
+        torchaudio_version = "2.0.2"
+
+        # Update versions for newer CUDA (>= 11.8)
+        if float(cuda_version) >= 11.8:
+            torch_version = "2.1.2"
+            torchvision_version = "0.16.2"
+            torchaudio_version = "2.1.2"
+
+    supported_versions = ["11.7", "11.8", "12.1"]
+    base_url_func = lambda v, x, y: f"https://download.pytorch.org/whl/{x}"
+
     torch_pkgs = [
         f"torch=={torch_version}",
         f"torchvision=={torchvision_version}",
         f"torchaudio=={torchaudio_version}",
     ]
     torch_cuda_pkgs = []
-    os_type, _ = get_cpu_avx_support()
+
     if os_type != OSType.DARWIN:
-        cuda_version = get_cuda_version()
-        if cuda_version:
-            supported_versions = ["11.7", "11.8"]
-            # torch_url = f"https://download.pytorch.org/whl/{cuda_version}/torch-{torch_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl"
-            # torchvision_url = f"https://download.pytorch.org/whl/{cuda_version}/torchvision-{torchvision_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl"
-            torch_url = _build_wheels(
-                "torch",
-                torch_version,
-                base_url_func=lambda v, x, y: f"https://download.pytorch.org/whl/{x}",
-                supported_cuda_versions=supported_versions,
-            )
-            torchvision_url = _build_wheels(
-                "torchvision",
-                torchvision_version,
-                base_url_func=lambda v, x, y: f"https://download.pytorch.org/whl/{x}",
-                supported_cuda_versions=supported_versions,
-            )
+        torch_url = _build_wheels(
+            "torch",
+            torch_version,
+            base_url_func=base_url_func,
+            supported_cuda_versions=supported_versions,
+        )
+        torchvision_url = _build_wheels(
+            "torchvision",
+            torchvision_version,
+            base_url_func=base_url_func,
+            supported_cuda_versions=supported_versions,
+        )
+        if torch_url:
             torch_url_cached = cache_package(
                 torch_url, "torch", os_type == OSType.WINDOWS
             )
+            torch_cuda_pkgs.append(f"torch @ {torch_url_cached}")
+        if torchvision_url:
             torchvision_url_cached = cache_package(
                 torchvision_url, "torchvision", os_type == OSType.WINDOWS
             )
+            torch_cuda_pkgs.append(f"torchvision @ {torchvision_url_cached}")
 
-            torch_cuda_pkgs = [
-                f"torch @ {torch_url_cached}",
-                f"torchvision @ {torchvision_url_cached}",
-                f"torchaudio=={torchaudio_version}",
-            ]
+        # Add torchaudio as it does not depend on CUDA version
+        torch_cuda_pkgs.append(f"torchaudio=={torchaudio_version}")
+    else:
+        torch_cuda_pkgs = torch_pkgs
 
+    # Assuming 'setup_spec' is a dictionary where we're adding these dependencies
     setup_spec.extras["torch"] = torch_pkgs
     setup_spec.extras["torch_cpu"] = torch_pkgs
     setup_spec.extras["torch_cuda"] = torch_cuda_pkgs
@@ -588,7 +606,11 @@ def default_requires():
     setup_spec.extras["default"] += setup_spec.extras["framework"]
     setup_spec.extras["default"] += setup_spec.extras["rag"]
     setup_spec.extras["default"] += setup_spec.extras["datasource"]
-    setup_spec.extras["default"] += setup_spec.extras["torch"]
+    cuda_version = get_cuda_version()
+    if cuda_version is not None:
+        setup_spec.extras["default"] += setup_spec.extras["torch_cuda"]
+    else:
+        setup_spec.extras["default"] += setup_spec.extras["torch"]
     setup_spec.extras["default"] += setup_spec.extras["quantization"]
     setup_spec.extras["default"] += setup_spec.extras["cache"]
 

From 01465f857f2f5bd4261861c638ee7f1ce8eb057f Mon Sep 17 00:00:00 2001
From: yyhhyy <95077259+yyhhyyyyyy@users.noreply.github.com>
Date: Mon, 8 Apr 2024 10:09:27 +0800
Subject: [PATCH 2/5] Upgrade torch to 2.2.1

Upgrade torch to torch 2.2.1 and fix the issue where the installation of llama_cpp with a too high CUDA version fails.
---
 setup.py | 137 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 80 insertions(+), 57 deletions(-)

diff --git a/setup.py b/setup.py
index f7b94ca09..3741cf895 100644
--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@
 import re
 import shutil
 import subprocess
+import sys
 import urllib.request
 from enum import Enum
 from typing import Callable, List, Optional, Tuple
@@ -40,15 +41,22 @@ def parse_requirements(file_name: str) -> List[str]:
         ]
 
 
+def find_python():
+    python_path = sys.executable
+    print(python_path)
+    if not python_path:
+        print("Python command not found.")
+        return None
+    return python_path
+
+
 def get_latest_version(package_name: str, index_url: str, default_version: str):
-    python_command = shutil.which("python")
+    python_command = find_python()
     if not python_command:
-        python_command = shutil.which("python3")
-        if not python_command:
-            print("Python command not found.")
-            return default_version
+        print("Python command not found.")
+        return default_version
 
-    command = [
+    command_index_versions = [
         python_command,
         "-m",
         "pip",
@@ -59,20 +67,41 @@ def get_latest_version(package_name: str, index_url: str, default_version: str):
         index_url,
     ]
 
-    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    if result.returncode != 0:
-        print("Error executing command.")
-        print(result.stderr.decode())
-        return default_version
+    result_index_versions = subprocess.run(
+        command_index_versions, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    if result_index_versions.returncode == 0:
+        output = result_index_versions.stdout.decode()
+        lines = output.split("\n")
+        for line in lines:
+            if "Available versions:" in line:
+                available_versions = line.split(":")[1].strip()
+                latest_version = available_versions.split(",")[0].strip()
+                # Query for compatibility with the latest version of torch
+                if package_name == "torch" or "torchvision":
+                    latest_version = latest_version.split("+")[0]
+                return latest_version
+    else:
+        command_simulate_install = [
+            python_command,
+            "-m",
+            "pip",
+            "install",
+            f"{package_name}==",
+        ]
 
-    output = result.stdout.decode()
-    lines = output.split("\n")
-    for line in lines:
-        if "Available versions:" in line:
-            available_versions = line.split(":")[1].strip()
-            latest_version = available_versions.split(",")[0].strip()
+        result_simulate_install = subprocess.run(
+            command_simulate_install, stderr=subprocess.PIPE
+        )
+        print(result_simulate_install)
+        stderr_output = result_simulate_install.stderr.decode()
+        print(stderr_output)
+        # 从错误输出中提取版本信息
+        match = re.search(r"from versions: (.+?)\)", stderr_output)
+        if match:
+            available_versions = match.group(1).split(", ")
+            latest_version = available_versions[-1].strip()
             return latest_version
-
     return default_version
 
 
@@ -227,7 +256,7 @@ def _build_wheels(
     base_url: str = None,
     base_url_func: Callable[[str, str, str], str] = None,
     pkg_file_func: Callable[[str, str, str, str, OSType], str] = None,
-    supported_cuda_versions: List[str] = ["11.7", "11.8", "12.1"],
+    supported_cuda_versions: List[str] = ["11.8", "12.1"],
 ) -> Optional[str]:
     """
     Build the URL for the package wheel file based on the package name, version, and CUDA version.
@@ -248,10 +277,11 @@ def _build_wheels(
     py_version = "cp" + "".join(py_version.split(".")[0:2])
     if os_type == OSType.DARWIN or not cuda_version:
         return None
-    if cuda_version not in supported_cuda_versions:
-        print(
-            f"Warnning: {pkg_name} supported cuda version: {supported_cuda_versions}, replace to {supported_cuda_versions[-1]}"
-        )
+    if cuda_version <= "11.8":
+        print(f"Warnning: {pkg_name} will use {supported_cuda_versions[0]}")
+        cuda_version = supported_cuda_versions[0]
+    else:
+        print(f"Warnning: {pkg_name} will use {supported_cuda_versions[-1]}")
         cuda_version = supported_cuda_versions[-1]
 
     cuda_version = "cu" + cuda_version.replace(".", "")
@@ -272,38 +302,24 @@ def _build_wheels(
         return f"{base_url}/{full_pkg_file}"
 
 
-def torch_requires():
+def torch_requires(
+    torch_version: str = "2.2.1",
+    torchvision_version: str = "0.17.1",
+    torchaudio_version: str = "2.2.1",
+):
     os_type, _ = get_cpu_avx_support()
-    cuda_version = get_cuda_version()
-
-    # Determine the default versions for CPU installations or if CUDA is not available
-    if os_type == OSType.DARWIN or not cuda_version:
-        torch_version = "2.1.2"
-        torchvision_version = "0.16.2"
-        torchaudio_version = "2.1.2"
-    else:
-        # Use default versions for older CUDA (< 11.8)
-        torch_version = "2.0.1"
-        torchvision_version = "0.15.2"
-        torchaudio_version = "2.0.2"
-
-        # Update versions for newer CUDA (>= 11.8)
-        if float(cuda_version) >= 11.8:
-            torch_version = "2.1.2"
-            torchvision_version = "0.16.2"
-            torchaudio_version = "2.1.2"
-
-    supported_versions = ["11.7", "11.8", "12.1"]
-    base_url_func = lambda v, x, y: f"https://download.pytorch.org/whl/{x}"
-
     torch_pkgs = [
         f"torch=={torch_version}",
         f"torchvision=={torchvision_version}",
         f"torchaudio=={torchaudio_version}",
     ]
-    torch_cuda_pkgs = []
+    # Initialize torch_cuda_pkgs for non-Darwin OSes;
+    # it will be the same as torch_pkgs for Darwin or when no specific CUDA handling is needed
+    torch_cuda_pkgs = torch_pkgs[:]
 
     if os_type != OSType.DARWIN:
+        supported_versions = ["11.8", "12.1"]
+        base_url_func = lambda v, x, y: f"https://download.pytorch.org/whl/{x}"
         torch_url = _build_wheels(
             "torch",
             torch_version,
@@ -316,21 +332,18 @@ def torch_requires():
             base_url_func=base_url_func,
             supported_cuda_versions=supported_versions,
         )
+
+        # Cache and add CUDA-dependent packages if URLs are available
         if torch_url:
             torch_url_cached = cache_package(
                 torch_url, "torch", os_type == OSType.WINDOWS
             )
-            torch_cuda_pkgs.append(f"torch @ {torch_url_cached}")
+            torch_cuda_pkgs[0] = f"torch @ {torch_url_cached}"
         if torchvision_url:
             torchvision_url_cached = cache_package(
                 torchvision_url, "torchvision", os_type == OSType.WINDOWS
             )
-            torch_cuda_pkgs.append(f"torchvision @ {torchvision_url_cached}")
-
-        # Add torchaudio as it does not depend on CUDA version
-        torch_cuda_pkgs.append(f"torchaudio=={torchaudio_version}")
-    else:
-        torch_cuda_pkgs = torch_pkgs
+            torch_cuda_pkgs[1] = f"torchvision @ {torchvision_url_cached}"
 
     # Assuming 'setup_spec' is a dictionary where we're adding these dependencies
     setup_spec.extras["torch"] = torch_pkgs
@@ -340,6 +353,7 @@ def torch_requires():
 
 def llama_cpp_python_cuda_requires():
     cuda_version = get_cuda_version()
+    supported_cuda_versions = ["11.8", "12.1"]
     device = "cpu"
     if not cuda_version:
         print("CUDA not support, use cpu version")
@@ -348,7 +362,10 @@ def llama_cpp_python_cuda_requires():
         print("Disable GPU acceleration")
         return
     # Supports GPU acceleration
-    device = "cu" + cuda_version.replace(".", "")
+    if cuda_version <= "11.8" and not None:
+        device = "cu" + supported_cuda_versions[0].replace(".", "")
+    else:
+        device = "cu" + supported_cuda_versions[-1].replace(".", "")
     os_type, cpu_avx = get_cpu_avx_support()
     print(f"OS: {os_type}, cpu avx: {cpu_avx}")
     supported_os = [OSType.WINDOWS, OSType.LINUX]
@@ -364,7 +381,7 @@ def llama_cpp_python_cuda_requires():
         cpu_device = "basic"
     device += cpu_device
     base_url = "https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui"
-    llama_cpp_version = "0.2.10"
+    llama_cpp_version = "0.2.26"
     py_version = "cp310"
     os_pkg_name = "manylinux_2_31_x86_64" if os_type == OSType.LINUX else "win_amd64"
     extra_index_url = f"{base_url}/llama_cpp_python_cuda-{llama_cpp_version}+{device}-{py_version}-{py_version}-{os_pkg_name}.whl"
@@ -511,7 +528,13 @@ def quantization_requires():
         # autoawq requirements:
         # 1. Compute Capability 7.5 (sm75). Turing and later architectures are supported.
         # 2. CUDA Toolkit 11.8 and later.
-        quantization_pkgs.extend(["autoawq", _build_autoawq_requires(), "optimum"])
+        cuda_version = get_cuda_version()
+        autoawq_latest_version = get_latest_version("autoawq", "", "0.2.4")
+        if cuda_version is None or cuda_version == "12.1":
+            quantization_pkgs.extend(["autoawq", _build_autoawq_requires(), "optimum"])
+        else:
+            # TODO(yyhhyy): Add autoawq install method for CUDA version 11.8
+            quantization_pkgs.extend(["autoawq", _build_autoawq_requires(), "optimum"])
 
     setup_spec.extras["quantization"] = ["cpm_kernels"] + quantization_pkgs
 

From d1bbb33ea8b4599758ba29b560604a24afb8821c Mon Sep 17 00:00:00 2001
From: yyhhyy <95077259+yyhhyyyyyy@users.noreply.github.com>
Date: Mon, 8 Apr 2024 10:16:57 +0800
Subject: [PATCH 3/5] And fix issue #1376

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3741cf895..75b43dfec 100644
--- a/setup.py
+++ b/setup.py
@@ -96,7 +96,6 @@ def get_latest_version(package_name: str, index_url: str, default_version: str):
         print(result_simulate_install)
         stderr_output = result_simulate_install.stderr.decode()
         print(stderr_output)
-        # 从错误输出中提取版本信息
         match = re.search(r"from versions: (.+?)\)", stderr_output)
         if match:
             available_versions = match.group(1).split(", ")

From 44b16c4446f5cdfaf2f3480899a28720c3029eb5 Mon Sep 17 00:00:00 2001
From: yyhhyy <95077259+yyhhyyyyyy@users.noreply.github.com>
Date: Mon, 8 Apr 2024 15:59:36 +0800
Subject: [PATCH 4/5] Enhance the rigor of the code

---
 setup.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index 75b43dfec..a5d738658 100644
--- a/setup.py
+++ b/setup.py
@@ -276,12 +276,17 @@ def _build_wheels(
     py_version = "cp" + "".join(py_version.split(".")[0:2])
     if os_type == OSType.DARWIN or not cuda_version:
         return None
-    if cuda_version <= "11.8":
-        print(f"Warnning: {pkg_name} will use {supported_cuda_versions[0]}")
-        cuda_version = supported_cuda_versions[0]
+
+    if cuda_version in supported_cuda_versions:
+        cuda_version = cuda_version
     else:
-        print(f"Warnning: {pkg_name} will use {supported_cuda_versions[-1]}")
-        cuda_version = supported_cuda_versions[-1]
+        print(
+            f"Warning: Your CUDA version {cuda_version} is not in our set supported_cuda_versions , we will use our set version."
+        )
+        if cuda_version < "12.1":
+            cuda_version = supported_cuda_versions[0]
+        else:
+            cuda_version = supported_cuda_versions[-1]
 
     cuda_version = "cu" + cuda_version.replace(".", "")
     os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64"
@@ -628,11 +633,7 @@ def default_requires():
     setup_spec.extras["default"] += setup_spec.extras["framework"]
     setup_spec.extras["default"] += setup_spec.extras["rag"]
     setup_spec.extras["default"] += setup_spec.extras["datasource"]
-    cuda_version = get_cuda_version()
-    if cuda_version is not None:
-        setup_spec.extras["default"] += setup_spec.extras["torch_cuda"]
-    else:
-        setup_spec.extras["default"] += setup_spec.extras["torch"]
+    setup_spec.extras["default"] += setup_spec.extras["torch"]
     setup_spec.extras["default"] += setup_spec.extras["quantization"]
     setup_spec.extras["default"] += setup_spec.extras["cache"]
 

From 29a1145b7b87a4947ce67b99af63c162cc7be43f Mon Sep 17 00:00:00 2001
From: Fangyin Cheng <staneyffer@gmail.com>
Date: Mon, 8 Apr 2024 18:05:44 +0800
Subject: [PATCH 5/5] feat: Upgrade docker image to cuda12.1

---
 docker/base/Dockerfile     | 2 +-
 docker/base/build_image.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
index b5be85c4f..4aee3044e 100644
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE="nvidia/cuda:11.8.0-runtime-ubuntu22.04"
+ARG BASE_IMAGE="nvidia/cuda:12.1.0-runtime-ubuntu22.04"
 
 FROM ${BASE_IMAGE}
 ARG BASE_IMAGE
diff --git a/docker/base/build_image.sh b/docker/base/build_image.sh
index 32846936f..028dcc809 100755
--- a/docker/base/build_image.sh
+++ b/docker/base/build_image.sh
@@ -4,7 +4,7 @@ SCRIPT_LOCATION=$0
 cd "$(dirname "$SCRIPT_LOCATION")"
 WORK_DIR=$(pwd)
 
-BASE_IMAGE_DEFAULT="nvidia/cuda:11.8.0-runtime-ubuntu22.04"
+BASE_IMAGE_DEFAULT="nvidia/cuda:12.1.0-runtime-ubuntu22.04"
 BASE_IMAGE_DEFAULT_CPU="ubuntu:22.04"
 
 BASE_IMAGE=$BASE_IMAGE_DEFAULT
@@ -21,7 +21,7 @@ BUILD_NETWORK=""
 DB_GPT_INSTALL_MODEL="default"
 
 usage () {
-    echo "USAGE: $0 [--base-image nvidia/cuda:11.8.0-runtime-ubuntu22.04] [--image-name db-gpt]"
+    echo "USAGE: $0 [--base-image nvidia/cuda:12.1.0-runtime-ubuntu22.04] [--image-name db-gpt]"
     echo "  [-b|--base-image base image name] Base image name"
     echo "  [-n|--image-name image name] Current image name, default: db-gpt"
     echo "  [-i|--pip-index-url pip index url] Pip index url, default: https://pypi.org/simple"