From 2d3d82ac95c2805887801807dddae68c7a988874 Mon Sep 17 00:00:00 2001
From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
Date: Mon, 4 Dec 2023 08:11:17 -0500
Subject: [PATCH 1/4] Implement support for CUDA

---
 .dockerignore                           |  2 +-
 .gitattributes                          |  1 -
 .github/workflows/docker-gpu.yml        | 70 +++++++++++++++++++++++++
 .github/workflows/docker.yml            |  4 +-
 docker-compose.dev.yml                  |  2 +-
 Dockerfile => docker/Dockerfile         |  0
 Dockerfile.dev => docker/Dockerfile.dev |  0
 docker/Dockerfile.gpu                   | 52 ++++++++++++++++++
 8 files changed, 126 insertions(+), 5 deletions(-)
 delete mode 100644 .gitattributes
 create mode 100644 .github/workflows/docker-gpu.yml
 rename Dockerfile => docker/Dockerfile (100%)
 rename Dockerfile.dev => docker/Dockerfile.dev (100%)
 create mode 100644 docker/Dockerfile.gpu

diff --git a/.dockerignore b/.dockerignore
index 797a5685a9d..a52ead71aa6 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,7 +9,7 @@ tests
 _releaser
 _site
 CONTRIBUTING.md
-Dockerfile
+docker/
 docker-compose.yml
 docker-compose.dev.yml
 .vscode/
diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 1567720fdff..00000000000
--- a/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-*.sh        eol=lf
\ No newline at end of file
diff --git a/.github/workflows/docker-gpu.yml b/.github/workflows/docker-gpu.yml
new file mode 100644
index 00000000000..d9ac3464884
--- /dev/null
+++ b/.github/workflows/docker-gpu.yml
@@ -0,0 +1,70 @@
+name: Docker (CUDA Suport)
+
+on:
+  push:
+    branches:
+      - "main"
+    paths-ignore:
+      - "**.md"
+      - LICENSE
+      - "docker-compose.yml"
+      - "docker-compose.dev.yml"
+      - ".github/ISSUE_TEMPLATE/*.yml"
+      - ".github/dependabot.yml"
+      - ".github/release-drafter.yml"
+  pull_request:
+    branches:
+      - "*"
+    paths:
+      - "docker/Dockerfile.gpu"
+      - "scripts/deploy.sh"
+      - "scripts/dev.sh"
+  workflow_dispatch:
+  release:
+    types: [published, edited]
+
+jobs:
+  build-and-publish-image:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/serge-chat/serge
+          tags: |
+            type=ref,event=branch-cuda
+            type=ref,event=pr-cuda
+            type=semver,pattern={{version}}-cuda
+            type=semver,pattern={{major}}-cuda
+            type=semver,pattern={{major}}.{{minor}}-cuda
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and Publish Docker Image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/Dockerfile.gpu
+          push: ${{ github.event_name != 'pull_request' }}
+          target: release
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          platforms: linux/amd64
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index b8ad5648a85..28b0140faf9 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -16,8 +16,7 @@ on:
     branches:
       - "*"
     paths:
-      - "Dockerfile"
-      - "Dockerfile.dev"
+      - "docker/Dockerfile"
       - "scripts/deploy.sh"
       - "scripts/dev.sh"
   workflow_dispatch:
@@ -61,6 +60,7 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: .
+          file: docker/Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
           target: release
           cache-from: type=gha
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index 64ebd6cfc18..d6d2da2478c 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -3,7 +3,7 @@ services:
     restart: on-failure
     build: 
       context: .
-      dockerfile: Dockerfile.dev
+      dockerfile: docker/Dockerfile.dev
     volumes:
       - ./web:/usr/src/app/web/
       - ./api:/usr/src/app/api/
diff --git a/Dockerfile b/docker/Dockerfile
similarity index 100%
rename from Dockerfile
rename to docker/Dockerfile
diff --git a/Dockerfile.dev b/docker/Dockerfile.dev
similarity index 100%
rename from Dockerfile.dev
rename to docker/Dockerfile.dev
diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
new file mode 100644
index 00000000000..a94661489bb
--- /dev/null
+++ b/docker/Dockerfile.gpu
@@ -0,0 +1,52 @@
+# ---------------------------------------
+# Base image for redis
+FROM redis:7-bookworm as redis
+
+# ---------------------------------------
+# Build frontend
+FROM node:20-bookworm-slim as frontend
+
+WORKDIR /usr/src/app
+COPY ./web/package.json ./web/package-lock.json ./
+RUN npm ci
+
+COPY ./web /usr/src/app/web/
+WORKDIR /usr/src/app/web/
+RUN npm run build
+
+# ---------------------------------------
+# Runtime environment
+FROM python:3.11-slim-bookworm as release
+
+# Set ENV
+ENV NODE_ENV='production'
+ENV TZ=Etc/UTC
+WORKDIR /usr/src/app
+
+# Copy artifacts
+COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
+COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
+COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
+COPY ./api /usr/src/app/api
+COPY scripts/deploy.sh /usr/src/app/deploy.sh
+COPY scripts/serge.env /usr/src/app/serge.env
+COPY vendor/requirements.txt /usr/src/app/requirements.txt
+
+# Install api dependencies
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends dumb-init \
+    && pip install --no-cache-dir ./api \
+    && pip install -r /usr/src/app/requirements.txt \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \
+    && chmod 755 /usr/src/app/deploy.sh \
+    && chmod 755 /usr/local/bin/redis-server \
+    && chmod 755 /usr/local/bin/redis-cli \
+    && mkdir -p /etc/redis \
+    && mkdir -p /data/db \
+    && mkdir -p /usr/src/app/weights \
+    && echo "appendonly yes" >> /etc/redis/redis.conf \
+    && echo "dir /data/db/" >> /etc/redis/redis.conf
+
+EXPOSE 8008
+ENTRYPOINT ["/usr/bin/dumb-init", "--"]
+CMD ["/bin/bash", "-c", "/usr/src/app/deploy.sh"]

From 2418ef07f89d61643e90afdabb4954062dd6b94a Mon Sep 17 00:00:00 2001
From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
Date: Mon, 4 Dec 2023 08:18:27 -0500
Subject: [PATCH 2/4] Add suffix to CI

---
 .github/workflows/docker-gpu.yml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/docker-gpu.yml b/.github/workflows/docker-gpu.yml
index d9ac3464884..6c32768989c 100644
--- a/.github/workflows/docker-gpu.yml
+++ b/.github/workflows/docker-gpu.yml
@@ -35,12 +35,14 @@ jobs:
         with:
           images: |
             ghcr.io/serge-chat/serge
+          flavor: |
+            suffix=-cuda,onlatest=true
           tags: |
-            type=ref,event=branch-cuda
-            type=ref,event=pr-cuda
-            type=semver,pattern={{version}}-cuda
-            type=semver,pattern={{major}}-cuda
-            type=semver,pattern={{major}}.{{minor}}-cuda
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}
+            type=semver,pattern={{major}}.{{minor}}
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v3

From 8e35f238c34f05ae7b745f25b2a97edb77e24a9d Mon Sep 17 00:00:00 2001
From: Olivier DEBAUCHE <olivier.debauche@uliege.be>
Date: Tue, 13 Feb 2024 04:54:26 +0100
Subject: [PATCH 3/4] Add GPU support (#1056)

* Update dev.sh

* Update deploy.sh

* Update serge.env

---------

Co-authored-by: Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
---
 scripts/deploy.sh | 9 +++++++--
 scripts/dev.sh    | 8 +++++++-
 scripts/serge.env | 3 ++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index 060d617183e..938d694c32c 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -19,14 +19,19 @@ detect_cpu_features() {
 		echo "basic"
 	fi
 }
-
 # Check if the CPU architecture is aarch64/arm64
 if [ "$cpu_arch" = "aarch64" ]; then
 	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/"
 else
 	# Use @smartappli provided wheels
 	cpu_feature=$(detect_cpu_features)
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+	if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122"
+	elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1"
+	else
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+	fi
 fi
 
 echo "Recommended install command for llama-cpp-python: $pip_command"
diff --git a/scripts/dev.sh b/scripts/dev.sh
index dde3899047e..d1d750fca06 100755
--- a/scripts/dev.sh
+++ b/scripts/dev.sh
@@ -26,7 +26,13 @@ if [ "$cpu_arch" = "aarch64" ]; then
 else
 	# Use @smartappli provided wheels
 	cpu_feature=$(detect_cpu_features)
-	pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+	if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122"
+	elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1"
+	else
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+	fi
 fi
 
 echo "Recommended install command for llama-cpp-python: $pip_command"
diff --git a/scripts/serge.env b/scripts/serge.env
index 2c36802a7c1..d3cf5672d53 100644
--- a/scripts/serge.env
+++ b/scripts/serge.env
@@ -1,3 +1,4 @@
-
 LLAMA_PYTHON_VERSION=0.2.38
+SERGE_GPU_NVIDIA_SUPPORT=false
+SERGE_GPU_AMD_SUPPORT=false
 SERGE_ENABLE_IPV6=false

From 235d65ca12d5dd11274cbbaf3f8f7d0830aa2de2 Mon Sep 17 00:00:00 2001
From: Olivier DEBAUCHE <olivier.debauche@uliege.be>
Date: Sun, 18 Feb 2024 16:00:49 +0100
Subject: [PATCH 4/4] Update llama-cpp-python (#1138)

* Update serge.env

* Update deploy.sh

Update path

* Update dev.sh

update path

* Update serge.env

* Update serge.env

Bump version of Llama cpp python to v0.2.44
---
 scripts/deploy.sh | 6 +++---
 scripts/dev.sh    | 6 +++---
 scripts/serge.env | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index 938d694c32c..fa4b46dde45 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -26,11 +26,11 @@ else
 	# Use @smartappli provided wheels
 	cpu_feature=$(detect_cpu_features)
 	if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
 	elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
 	else
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
 	fi
 fi
 
diff --git a/scripts/dev.sh b/scripts/dev.sh
index 0f41c345491..4f28caf5f1c 100755
--- a/scripts/dev.sh
+++ b/scripts/dev.sh
@@ -27,11 +27,11 @@ else
 	# Use @smartappli provided wheels
 	cpu_feature=$(detect_cpu_features)
 	if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122"
 	elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1"
 	else
-		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
+		pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu"
 	fi
 fi
 
diff --git a/scripts/serge.env b/scripts/serge.env
index cd900063821..4884fc61c0f 100644
--- a/scripts/serge.env
+++ b/scripts/serge.env
@@ -1,4 +1,4 @@
 SERGE_GPU_NVIDIA_SUPPORT=false
 SERGE_GPU_AMD_SUPPORT=false
-LLAMA_PYTHON_VERSION=0.2.39
-SERGE_ENABLE_IPV6=false
\ No newline at end of file
+LLAMA_PYTHON_VERSION=0.2.44
+SERGE_ENABLE_IPV6=false