llama-cpp add support for cpu

substratusai · Aug 29, 2023 · 315a4f7 · 315a4f7
1 parent d72d4fa
commit 315a4f7
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 6 deletions.
diff --git a/.github/workflows/build-and-push.yaml b/.github/workflows/build-and-push.yaml
@@ -209,18 +209,40 @@ jobs:
           username: "${{ vars.DOCKERHUB_USERNAME }}"
           password: "${{ secrets.DOCKERHUB_TOKEN }}"
       - name: Docker meta
-        id: meta
+        id: meta-cpu
         uses: docker/metadata-action@v4
         with:
           images: substratusai/${{ github.job }}
+          flavor: |
+            latest=true
+            suffix=cpu
       - name: Build and push
-        id: build-and-push
         uses: docker/build-push-action@v4
         with:
           context: ./${{ github.job }}
           platforms: "linux/amd64"
           build-args: |
             BASE_IMAGE=${{ needs.base.outputs.tag }}
+            COMPUTE_TYPE=cpu
           push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          tags: ${{ steps.meta-cpu.outputs.tags }}
+          labels: ${{ steps.meta-cpu.outputs.labels }}
+      - name: Docker meta
+        id: meta-gpu
+        uses: docker/metadata-action@v4
+        with:
+          images: substratusai/${{ github.job }}
+          flavor: |
+            latest=false
+            suffix=gpu
+      - name: Build and push
+        uses: docker/build-push-action@v4
+        with:
+          context: ./${{ github.job }}
+          platforms: "linux/amd64"
+          build-args: |
+            BASE_IMAGE=${{ needs.base.outputs.tag }}
+            COMPUTE_TYPE=gpu
+          push: true
+          tags: ${{ steps.meta-gpu.outputs.tags }}
+          labels: ${{ steps.meta-gpu.outputs.labels }}
diff --git a/model-server-llama-cpp/Dockerfile b/model-server-llama-cpp/Dockerfile
@@ -1,5 +1,7 @@
 ARG BASE_IMAGE=substratusai/base:latest
+ARG COMPUTE_TYPE=gpu
 FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 as build
+ARG COMPUTE_TYPE
 
 RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
     apt-get update && \
@@ -20,8 +22,10 @@ ENV LLAMA_CUBLAS=1
 # Install depencencies
 RUN /venv/bin/python3 -m pip install --upgrade pip wheel pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
 
-RUN git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git && \
-    cd llama-cpp-python && make build.cuda
+RUN git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git
+WORKDIR /llama-cpp-python
+RUN if [ "$COMPUTE_TYPE" = "gpu" ]; then make build.cuda; fi
+RUN if [ "$COMPUTE_TYPE" = "cpu" ]; then make build.openblas; fi
 
 FROM ${BASE_IMAGE}