Skip to content

Commit

Permalink
add llama.cpp model server (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
samos123 authored Aug 25, 2023
1 parent 95f2a91 commit ad53c22
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/build-and-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,34 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
model-server-llama-cpp:
needs: base
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: "${{ vars.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
- name: Docker meta
id: meta
uses: docker/metadata-action@v4
with:
images: substratusai/${{ github.job }}
- name: Build and push
id: build-and-push
uses: docker/build-push-action@v4
with:
context: ./${{ github.job }}
platforms: "linux/amd64"
build-args: |
BASE_IMAGE=${{ needs.base.outputs.tag }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
21 changes: 21 additions & 0 deletions model-server-llama-cpp/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
__pycache__
*.pyc
*.pyo
*.pyd
.Python
.env
env
pip-log.txt
pip-delete-this-directory.txt
.tox
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.mypy_cache
.pytest_cache
.hypothesis
4 changes: 4 additions & 0 deletions model-server-llama-cpp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
trained/
ran/
.venv
.ipynb_checkpoints
33 changes: 33 additions & 0 deletions model-server-llama-cpp/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ARG BASE_IMAGE=substratusai/base:latest
FROM ${BASE_IMAGE}

ENV MODEL_DIR="/content/saved-model"
ENV HOST=0.0.0.0
ENV PORT=8080

WORKDIR /content

RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
apt-get update && \
apt-get -y --no-install-recommends install \
python3 python3-pip git build-essential gcc wget \
ocl-icd-opencl-dev opencl-headers clinfo libclblast-dev libopenblas-dev && \
mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/bin/python3 /usr/bin/python

# setting build related env vars
ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1

# Install depencencies
RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings

# Install llama-cpp-python (build with cuda)
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python

COPY scripts/ scripts/

CMD serve.sh
EXPOSE $PORT
13 changes: 13 additions & 0 deletions model-server-llama-cpp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Substratus Server Llama.cpp

This image can be used to serve models that are in GGML format.

The image expects a single GGML model as a single bin file under the /content/saved-model/ directory.

## Usage for testing

### Building
Build the image:
```sh
docker build -t substratusai/model-server-llama-cpp .
```
7 changes: 7 additions & 0 deletions model-server-llama-cpp/scripts/serve.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env sh

set -xe

ls ${MODEL_DIR}
export MODEL=$(find "${MODEL_DIR}" -type f -iname "*.bin" | head -n 1)
PYTHONUNBUFFERED=1 python3 -m llama_cpp.server

0 comments on commit ad53c22

Please sign in to comment.