Skip to content

Commit

Permalink
trying with multistage devel but no success
Browse files Browse the repository at this point in the history
  • Loading branch information
samos123 committed Aug 27, 2023
1 parent ffd778d commit e49aa4e
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 15 deletions.
45 changes: 31 additions & 14 deletions model-server-llama-cpp/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,49 @@
ARG BASE_IMAGE=nvidia/cuda:12.1.1-devel-ubuntu22.04
FROM ${BASE_IMAGE}

ENV MODEL_DIR="/content/saved-model"
ENV HOST=0.0.0.0
ENV PORT=8080

WORKDIR /content
ARG BASE_IMAGE=substratusai/base:latest
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 as build

RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
apt-get update && \
apt-get -y --no-install-recommends install \
python3 python3-pip git build-essential gcc wget \
python3 python3-pip python3-venv git build-essential gcc wget \
ocl-icd-opencl-dev opencl-headers clinfo libclblast-dev libopenblas-dev && \
mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/bin/python3 /usr/bin/python
# Ensures that the python and pip executables used
# in the image will be those from our virtualenv.
RUN python3 -m venv /venv
ENV PATH="/venv/bin:$PATH"

# setting build related env vars
ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1
ENV USE_MLOCK=0

# Install depencencies
RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
RUN /venv/bin/python3 -m pip install --upgrade pip wheel pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings

RUN git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git && \
cd llama-cpp-python && make build.cuda
# Install llama-cpp-python (build with cuda)
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
# RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 /venv/bin/python3 -m pip install llama-cpp-python


FROM ${BASE_IMAGE}

WORKDIR /content
ENV USE_MLOCK=0
ENV MODEL_DIR="/content/saved-model"
ENV HOST=0.0.0.0
ENV PORT=8080
ENV PATH="/venv/bin:$PATH"
COPY --from=build /venv /venv
COPY --from=build /llama-cpp-python /llama-cpp-python
RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
apt-get update && \
apt-get -y --no-install-recommends install \
git wget \
ocl-icd-libopencl1 clinfo libclblast1 libopenblas0 && \
mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
rm -rf /var/lib/apt/lists/*


COPY scripts/ scripts/

Expand Down
21 changes: 20 additions & 1 deletion model-server-llama-cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,24 @@ The image expects a single GGML model as a single bin file under the /content/sa
### Building
Build the image:
```sh
docker build -t substratusai/model-server-llama-cpp .
docker build -t llama-cpp .
```

### Testing
Download a GGML model:
```bash
curl -L -o model.bin https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q2_K.bin
```

Run the image with that model:
```bash
docker run --gpus=all -d -p 8080:8080 \
-v $PWD/model.bin:/content/saved-model/model.bin llama-cpp
```

Verify it's working:
```bash
curl http://localhost:8080/v1/completions \
-H "Content-Type: application/json" \
-d '{ "prompt": "Who was the first president of the United States?", "stop": ["."]}'
```

0 comments on commit e49aa4e

Please sign in to comment.