From 2027b16fda52281ef24af1fa275ca434c59f65a5 Mon Sep 17 00:00:00 2001 From: soham <68921363+sohzm@users.noreply.github.com> Date: Tue, 27 Aug 2024 21:26:09 +0530 Subject: [PATCH] feat: add vulkan backend support (#291) * Fix includes and init vulkan the same as llama.cpp * Add Windows Vulkan CI * Updated ggml submodule * support epsilon as a parameter for ggml_group_norm --------- Co-authored-by: Cloudwalk Co-authored-by: Oleg Skutte <00.00.oleg.00.00@gmail.com> Co-authored-by: leejet --- .github/workflows/build.yml | 13 +++++++++++++ CMakeLists.txt | 7 +++++++ ggml | 2 +- ggml_extend.hpp | 6 +++++- model.cpp | 4 ++++ stable-diffusion.cpp | 11 ++++++++++- upscaler.cpp | 4 ++++ 7 files changed, 44 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ada75f3a..fe141089 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -148,6 +148,9 @@ jobs: windows-latest-cmake: runs-on: windows-2019 + env: + VULKAN_VERSION: 1.3.261.1 + strategy: matrix: include: @@ -163,6 +166,8 @@ jobs: defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON" - build: "rocm5.5" defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON' + - build: 'vulkan' + defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON" steps: - name: Clone id: checkout @@ -192,6 +197,14 @@ jobs: uses: urkle/action-get-ninja@v1 with: version: 1.11.1 + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.build == 'vulkan' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - name: Build id: cmake_build diff --git a/CMakeLists.txt b/CMakeLists.txt index 71bcd3e5..a9377449 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) option(SD_CUBLAS "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) +option(SD_VULKAN "sd: vulkan backend" OFF) option(SD_SYCL "sd: sycl backend" OFF) option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) @@ -45,6 +46,12 @@ if(SD_METAL) add_definitions(-DSD_USE_METAL) endif() +if (SD_VULKAN) + message("Use Vulkan as backend stable-diffusion") + set(GGML_VULKAN ON) + add_definitions(-DSD_USE_VULKAN) +endif () + if (SD_HIPBLAS) message("Use HIPBLAS as backend stable-diffusion") set(GGML_HIPBLAS ON) diff --git a/ggml b/ggml index a06c6834..21f9e5c4 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit a06c68343e9976fdfc80917a958b903a0d7c8cc6 +Subproject commit 21f9e5c426b105841c2e346d8f1aafec398edf15 diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 09e4fcb2..617f8f65 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -32,6 +32,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #ifdef SD_USE_SYCL #include "ggml-sycl.h" #endif @@ -655,7 +659,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_SYCL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; diff --git a/model.cpp b/model.cpp index 7390c161..b74a735f 100644 --- a/model.cpp +++ b/model.cpp @@ -21,6 +21,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #define ST_HEADER_SIZE_LEN 8 uint64_t read_u64(uint8_t* buffer) { diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 1bbe0d94..c73f1e43 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -160,6 +160,15 @@ class StableDiffusionGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) { + backend = ggml_backend_vk_init(device); + } + if(!backend) { + LOG_WARN("Failed to initialize Vulkan backend"); + } +#endif #ifdef SD_USE_SYCL LOG_DEBUG("Using SYCL backend"); backend = ggml_backend_sycl_init(0); @@ -170,7 +179,7 @@ class StableDiffusionGGML { backend = ggml_backend_cpu_init(); } #ifdef SD_USE_FLASH_ATTENTION -#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL) +#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined (SD_USE_SYCL) || defined(SD_USE_VULKAN) LOG_WARN("Flash Attention not supported with GPU Backend"); #else LOG_INFO("Flash Attention enabled"); diff --git a/upscaler.cpp b/upscaler.cpp index 2890ad34..09635299 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -24,6 +24,10 @@ struct UpscalerGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(0); +#endif #ifdef SD_USE_SYCL LOG_DEBUG("Using SYCL backend"); backend = ggml_backend_sycl_init(0);