From 2d3d82ac95c2805887801807dddae68c7a988874 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Mon, 4 Dec 2023 08:11:17 -0500 Subject: [PATCH 1/4] Implement support for CUDA --- .dockerignore | 2 +- .gitattributes | 1 - .github/workflows/docker-gpu.yml | 70 +++++++++++++++++++++++++ .github/workflows/docker.yml | 4 +- docker-compose.dev.yml | 2 +- Dockerfile => docker/Dockerfile | 0 Dockerfile.dev => docker/Dockerfile.dev | 0 docker/Dockerfile.gpu | 52 ++++++++++++++++++ 8 files changed, 126 insertions(+), 5 deletions(-) delete mode 100644 .gitattributes create mode 100644 .github/workflows/docker-gpu.yml rename Dockerfile => docker/Dockerfile (100%) rename Dockerfile.dev => docker/Dockerfile.dev (100%) create mode 100644 docker/Dockerfile.gpu diff --git a/.dockerignore b/.dockerignore index 797a5685a9d..a52ead71aa6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,7 @@ tests _releaser _site CONTRIBUTING.md -Dockerfile +docker/ docker-compose.yml docker-compose.dev.yml .vscode/ diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 1567720fdff..00000000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -*.sh eol=lf \ No newline at end of file diff --git a/.github/workflows/docker-gpu.yml b/.github/workflows/docker-gpu.yml new file mode 100644 index 00000000000..d9ac3464884 --- /dev/null +++ b/.github/workflows/docker-gpu.yml @@ -0,0 +1,70 @@ +name: Docker (CUDA Suport) + +on: + push: + branches: + - "main" + paths-ignore: + - "**.md" + - LICENSE + - "docker-compose.yml" + - "docker-compose.dev.yml" + - ".github/ISSUE_TEMPLATE/*.yml" + - ".github/dependabot.yml" + - ".github/release-drafter.yml" + pull_request: + branches: + - "*" + paths: + - "docker/Dockerfile.gpu" + - "scripts/deploy.sh" + - "scripts/dev.sh" + workflow_dispatch: + release: + types: [published, edited] + +jobs: + build-and-publish-image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ghcr.io/serge-chat/serge + tags: | + type=ref,event=branch-cuda + type=ref,event=pr-cuda + type=semver,pattern={{version}}-cuda + type=semver,pattern={{major}}-cuda + type=semver,pattern={{major}}.{{minor}}-cuda + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and Publish Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: docker/Dockerfile.gpu + push: ${{ github.event_name != 'pull_request' }} + target: release + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b8ad5648a85..28b0140faf9 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -16,8 +16,7 @@ on: branches: - "*" paths: - - "Dockerfile" - - "Dockerfile.dev" + - "docker/Dockerfile" - "scripts/deploy.sh" - "scripts/dev.sh" workflow_dispatch: @@ -61,6 +60,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . + file: docker/Dockerfile push: ${{ github.event_name != 'pull_request' }} target: release cache-from: type=gha diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 64ebd6cfc18..d6d2da2478c 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -3,7 +3,7 @@ services: restart: on-failure build: context: . - dockerfile: Dockerfile.dev + dockerfile: docker/Dockerfile.dev volumes: - ./web:/usr/src/app/web/ - ./api:/usr/src/app/api/ diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/Dockerfile.dev b/docker/Dockerfile.dev similarity index 100% rename from Dockerfile.dev rename to docker/Dockerfile.dev diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu new file mode 100644 index 00000000000..a94661489bb --- /dev/null +++ b/docker/Dockerfile.gpu @@ -0,0 +1,52 @@ +# --------------------------------------- +# Base image for redis +FROM redis:7-bookworm as redis + +# --------------------------------------- +# Build frontend +FROM node:20-bookworm-slim as frontend + +WORKDIR /usr/src/app +COPY ./web/package.json ./web/package-lock.json ./ +RUN npm ci + +COPY ./web /usr/src/app/web/ +WORKDIR /usr/src/app/web/ +RUN npm run build + +# --------------------------------------- +# Runtime environment +FROM python:3.11-slim-bookworm as release + +# Set ENV +ENV NODE_ENV='production' +ENV TZ=Etc/UTC +WORKDIR /usr/src/app + +# Copy artifacts +COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server +COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli +COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/ +COPY ./api /usr/src/app/api +COPY scripts/deploy.sh /usr/src/app/deploy.sh +COPY scripts/serge.env /usr/src/app/serge.env +COPY vendor/requirements.txt /usr/src/app/requirements.txt + +# Install api dependencies +RUN apt-get update \ + && apt-get install -y --no-install-recommends dumb-init \ + && pip install --no-cache-dir ./api \ + && pip install -r /usr/src/app/requirements.txt \ + && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \ + && chmod 755 /usr/src/app/deploy.sh \ + && chmod 755 /usr/local/bin/redis-server \ + && chmod 755 /usr/local/bin/redis-cli \ + && mkdir -p /etc/redis \ + && mkdir -p /data/db \ + && mkdir -p /usr/src/app/weights \ + && echo "appendonly yes" >> /etc/redis/redis.conf \ + && echo "dir /data/db/" >> /etc/redis/redis.conf + +EXPOSE 8008 +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +CMD ["/bin/bash", "-c", "/usr/src/app/deploy.sh"] From 2418ef07f89d61643e90afdabb4954062dd6b94a Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Mon, 4 Dec 2023 08:18:27 -0500 Subject: [PATCH 2/4] Add suffix to CI --- .github/workflows/docker-gpu.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-gpu.yml b/.github/workflows/docker-gpu.yml index d9ac3464884..6c32768989c 100644 --- a/.github/workflows/docker-gpu.yml +++ b/.github/workflows/docker-gpu.yml @@ -35,12 +35,14 @@ jobs: with: images: | ghcr.io/serge-chat/serge + flavor: | + suffix=-cuda,onlatest=true tags: | - type=ref,event=branch-cuda - type=ref,event=pr-cuda - type=semver,pattern={{version}}-cuda - type=semver,pattern={{major}}-cuda - type=semver,pattern={{major}}.{{minor}}-cuda + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}} + type=semver,pattern={{major}}.{{minor}} - name: Set up QEMU uses: docker/setup-qemu-action@v3 From 8e35f238c34f05ae7b745f25b2a97edb77e24a9d Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Tue, 13 Feb 2024 04:54:26 +0100 Subject: [PATCH 3/4] Add GPU support (#1056) * Update dev.sh * Update deploy.sh * Update serge.env --------- Co-authored-by: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> --- scripts/deploy.sh | 9 +++++++-- scripts/dev.sh | 8 +++++++- scripts/serge.env | 3 ++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 060d617183e..938d694c32c 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -19,14 +19,19 @@ detect_cpu_features() { echo "basic" fi } - # Check if the CPU architecture is aarch64/arm64 if [ "$cpu_arch" = "aarch64" ]; then pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/" else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122" + elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1" + else + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + fi fi echo "Recommended install command for llama-cpp-python: $pip_command" diff --git a/scripts/dev.sh b/scripts/dev.sh index dde3899047e..d1d750fca06 100755 --- a/scripts/dev.sh +++ b/scripts/dev.sh @@ -26,7 +26,13 @@ if [ "$cpu_arch" = "aarch64" ]; then else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122" + elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1" + else + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + fi fi echo "Recommended install command for llama-cpp-python: $pip_command" diff --git a/scripts/serge.env b/scripts/serge.env index 2c36802a7c1..d3cf5672d53 100644 --- a/scripts/serge.env +++ b/scripts/serge.env @@ -1,3 +1,4 @@ - LLAMA_PYTHON_VERSION=0.2.38 +SERGE_GPU_NVIDIA_SUPPORT=false +SERGE_GPU_AMD_SUPPORT=false SERGE_ENABLE_IPV6=false From 235d65ca12d5dd11274cbbaf3f8f7d0830aa2de2 Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Sun, 18 Feb 2024 16:00:49 +0100 Subject: [PATCH 4/4] Update llama-cpp-python (#1138) * Update serge.env * Update deploy.sh Update path * Update dev.sh update path * Update serge.env * Update serge.env Bump version of Llama cpp python to v0.2.44 --- scripts/deploy.sh | 6 +++--- scripts/dev.sh | 6 +++--- scripts/serge.env | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 938d694c32c..fa4b46dde45 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -26,11 +26,11 @@ else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122" elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1" else - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" fi fi diff --git a/scripts/dev.sh b/scripts/dev.sh index 0f41c345491..4f28caf5f1c 100755 --- a/scripts/dev.sh +++ b/scripts/dev.sh @@ -27,11 +27,11 @@ else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cu122" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122" elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/rocm5.6.1" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1" else - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu" + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" fi fi diff --git a/scripts/serge.env b/scripts/serge.env index cd900063821..4884fc61c0f 100644 --- a/scripts/serge.env +++ b/scripts/serge.env @@ -1,4 +1,4 @@ SERGE_GPU_NVIDIA_SUPPORT=false SERGE_GPU_AMD_SUPPORT=false -LLAMA_PYTHON_VERSION=0.2.39 -SERGE_ENABLE_IPV6=false \ No newline at end of file +LLAMA_PYTHON_VERSION=0.2.44 +SERGE_ENABLE_IPV6=false