add llama.cpp model server (#34)

substratusai · Aug 25, 2023 · ad53c22 · ad53c22
1 parent 95f2a91
commit ad53c22
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 0 deletions.
diff --git a/.github/workflows/build-and-push.yaml b/.github/workflows/build-and-push.yaml
@@ -193,3 +193,34 @@ jobs:
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
+  model-server-llama-cpp:
+    needs: base
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: "${{ vars.DOCKERHUB_USERNAME }}"
+          password: "${{ secrets.DOCKERHUB_TOKEN }}"
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: substratusai/${{ github.job }}
+      - name: Build and push
+        id: build-and-push
+        uses: docker/build-push-action@v4
+        with:
+          context: ./${{ github.job }}
+          platforms: "linux/amd64"
+          build-args: |
+            BASE_IMAGE=${{ needs.base.outputs.tag }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/model-server-llama-cpp/.dockerignore b/model-server-llama-cpp/.dockerignore
@@ -0,0 +1,21 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+.env
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis
diff --git a/model-server-llama-cpp/.gitignore b/model-server-llama-cpp/.gitignore
@@ -0,0 +1,4 @@
+trained/
+ran/
+.venv
+.ipynb_checkpoints
diff --git a/model-server-llama-cpp/Dockerfile b/model-server-llama-cpp/Dockerfile
@@ -0,0 +1,33 @@
+ARG BASE_IMAGE=substratusai/base:latest
+FROM ${BASE_IMAGE}
+
+ENV MODEL_DIR="/content/saved-model"
+ENV HOST=0.0.0.0
+ENV PORT=8080
+
+WORKDIR /content
+
+RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
+    apt-get update && \
+    apt-get -y --no-install-recommends install \
+      python3 python3-pip git build-essential gcc wget \
+      ocl-icd-opencl-dev opencl-headers clinfo libclblast-dev libopenblas-dev && \
+    mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+# setting build related env vars
+ENV CUDA_DOCKER_ARCH=all
+ENV LLAMA_CUBLAS=1
+
+# Install depencencies
+RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
+
+# Install llama-cpp-python (build with cuda)
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
+
+COPY scripts/ scripts/
+
+CMD serve.sh
+EXPOSE $PORT
diff --git a/model-server-llama-cpp/README.md b/model-server-llama-cpp/README.md
@@ -0,0 +1,13 @@
+# Substratus Server Llama.cpp
+
+This image can be used to serve models that are in GGML format.
+
+The image expects a single GGML model as a single bin file under the /content/saved-model/ directory.
+
+## Usage for testing
+
+### Building
+Build the image:
+```sh
+docker build -t substratusai/model-server-llama-cpp .
+```
diff --git a/model-server-llama-cpp/scripts/serve.sh b/model-server-llama-cpp/scripts/serve.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env sh
+
+set -xe
+
+ls ${MODEL_DIR}
+export MODEL=$(find "${MODEL_DIR}" -type f -iname "*.bin" | head -n 1)
+PYTHONUNBUFFERED=1 python3 -m llama_cpp.server