diff --git a/.dockerignore b/.dockerignore index 797a5685a9d..a52ead71aa6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,7 @@ tests _releaser _site CONTRIBUTING.md -Dockerfile +docker/ docker-compose.yml docker-compose.dev.yml .vscode/ diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 1567720fdff..00000000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -*.sh eol=lf \ No newline at end of file diff --git a/.github/workflows/docker-gpu.yml b/.github/workflows/docker-gpu.yml new file mode 100644 index 00000000000..6c32768989c --- /dev/null +++ b/.github/workflows/docker-gpu.yml @@ -0,0 +1,72 @@ +name: Docker (CUDA Suport) + +on: + push: + branches: + - "main" + paths-ignore: + - "**.md" + - LICENSE + - "docker-compose.yml" + - "docker-compose.dev.yml" + - ".github/ISSUE_TEMPLATE/*.yml" + - ".github/dependabot.yml" + - ".github/release-drafter.yml" + pull_request: + branches: + - "*" + paths: + - "docker/Dockerfile.gpu" + - "scripts/deploy.sh" + - "scripts/dev.sh" + workflow_dispatch: + release: + types: [published, edited] + +jobs: + build-and-publish-image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ghcr.io/serge-chat/serge + flavor: | + suffix=-cuda,onlatest=true + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}} + type=semver,pattern={{major}}.{{minor}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and Publish Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: docker/Dockerfile.gpu + push: ${{ github.event_name != 'pull_request' }} + target: release + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b8ad5648a85..28b0140faf9 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -16,8 +16,7 @@ on: branches: - "*" paths: - - "Dockerfile" - - "Dockerfile.dev" + - "docker/Dockerfile" - "scripts/deploy.sh" - "scripts/dev.sh" workflow_dispatch: @@ -61,6 +60,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . + file: docker/Dockerfile push: ${{ github.event_name != 'pull_request' }} target: release cache-from: type=gha diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index d9e27728f3b..7384a7c56b1 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -3,7 +3,7 @@ services: restart: on-failure build: context: . - dockerfile: Dockerfile.dev + dockerfile: docker/Dockerfile.dev volumes: - ./web:/usr/src/app/web/:z - ./api:/usr/src/app/api/:z diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/Dockerfile.dev b/docker/Dockerfile.dev similarity index 100% rename from Dockerfile.dev rename to docker/Dockerfile.dev diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu new file mode 100644 index 00000000000..a94661489bb --- /dev/null +++ b/docker/Dockerfile.gpu @@ -0,0 +1,52 @@ +# --------------------------------------- +# Base image for redis +FROM redis:7-bookworm as redis + +# --------------------------------------- +# Build frontend +FROM node:20-bookworm-slim as frontend + +WORKDIR /usr/src/app +COPY ./web/package.json ./web/package-lock.json ./ +RUN npm ci + +COPY ./web /usr/src/app/web/ +WORKDIR /usr/src/app/web/ +RUN npm run build + +# --------------------------------------- +# Runtime environment +FROM python:3.11-slim-bookworm as release + +# Set ENV +ENV NODE_ENV='production' +ENV TZ=Etc/UTC +WORKDIR /usr/src/app + +# Copy artifacts +COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server +COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli +COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/ +COPY ./api /usr/src/app/api +COPY scripts/deploy.sh /usr/src/app/deploy.sh +COPY scripts/serge.env /usr/src/app/serge.env +COPY vendor/requirements.txt /usr/src/app/requirements.txt + +# Install api dependencies +RUN apt-get update \ + && apt-get install -y --no-install-recommends dumb-init \ + && pip install --no-cache-dir ./api \ + && pip install -r /usr/src/app/requirements.txt \ + && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* \ + && chmod 755 /usr/src/app/deploy.sh \ + && chmod 755 /usr/local/bin/redis-server \ + && chmod 755 /usr/local/bin/redis-cli \ + && mkdir -p /etc/redis \ + && mkdir -p /data/db \ + && mkdir -p /usr/src/app/weights \ + && echo "appendonly yes" >> /etc/redis/redis.conf \ + && echo "dir /data/db/" >> /etc/redis/redis.conf + +EXPOSE 8008 +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +CMD ["/bin/bash", "-c", "/usr/src/app/deploy.sh"] diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 109a90f5f17..07814e86c22 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -19,14 +19,20 @@ detect_cpu_features() { echo "basic" fi } - # Check if the CPU architecture is aarch64/arm64 if [ "$cpu_arch" = "aarch64" ]; then pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://gaby.github.io/arm64-wheels/" else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" + + if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122" + elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1" + else + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" + fi fi echo "Recommended install command for llama-cpp-python: $pip_command" diff --git a/scripts/dev.sh b/scripts/dev.sh index 3ed84596a12..f6b06366da1 100755 --- a/scripts/dev.sh +++ b/scripts/dev.sh @@ -26,7 +26,14 @@ if [ "$cpu_arch" = "aarch64" ]; then else # Use @smartappli provided wheels cpu_feature=$(detect_cpu_features) - pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" + + if [ "$SERGE_GPU_NVIDIA_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cu122" + elif [ "$SERGE_GPU_AMD_SUPPORT" = true ]; then + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/rocm5.6.1" + else + pip_command="python -m pip install -v llama-cpp-python==$LLAMA_PYTHON_VERSION --only-binary=:all: --extra-index-url=https://smartappli.github.io/serge-wheels/$cpu_feature/cpu" + fi fi echo "Recommended install command for llama-cpp-python: $pip_command" diff --git a/scripts/serge.env b/scripts/serge.env index 24fe4c30e0c..9f7f35deefc 100644 --- a/scripts/serge.env +++ b/scripts/serge.env @@ -1,3 +1,5 @@ +SERGE_GPU_NVIDIA_SUPPORT=false +SERGE_GPU_AMD_SUPPORT=false LLAMA_PYTHON_VERSION=0.2.50 SERGE_ENABLE_IPV4=true SERGE_ENABLE_IPV6=false