Skip to content

Commit

Permalink
Port exllamav2 (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
flozi00 authored Nov 27, 2023
1 parent f9409af commit 8347f58
Show file tree
Hide file tree
Showing 38 changed files with 3,019 additions and 1,540 deletions.
9 changes: 9 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"build": {
"dockerfile": "../Dockerfile"
},
"runArgs": [
"--gpus",
"all"
]
}
51 changes: 25 additions & 26 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,19 @@ ARG TARGETPLATFORM
ENV PATH /opt/conda/bin:$PATH

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
sudo \
curl \
git && \
rm -rf /var/lib/apt/lists/*
build-essential \
ca-certificates \
ccache \
sudo \
curl \
git && \
rm -rf /var/lib/apt/lists/*

# Install conda
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
Expand All @@ -71,19 +71,19 @@ RUN chmod +x ~/mambaforge.sh && \
# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=3.9" && \
/opt/conda/bin/pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118 ;; \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=3.9" && \
/opt/conda/bin/pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118 ;; \
esac && \
/opt/conda/bin/conda clean -ya

# CUDA kernels builder image
FROM pytorch-install as kernel-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ninja-build \
&& rm -rf /var/lib/apt/lists/*
ninja-build \
&& rm -rf /var/lib/apt/lists/*

RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
/opt/conda/bin/conda clean -ya
Expand All @@ -108,13 +108,12 @@ COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2
RUN make build-flash-attention-v2

# Build Transformers exllama kernels
# Build Transformers exllama and exllamav2 kernels
FROM kernel-builder as exllama-kernels-builder

WORKDIR /usr/src

COPY server/exllama_kernels/ .

COPY server/exllamav2_kernels/ .

# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
Expand Down Expand Up @@ -167,11 +166,11 @@ ENV HUGGINGFACE_HUB_CACHE=/data \
WORKDIR /usr/src

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libssl-dev \
ca-certificates \
make \
sudo \
&& rm -rf /var/lib/apt/lists/*
libssl-dev \
ca-certificates \
make \
sudo \
&& rm -rf /var/lib/apt/lists/*

# Copy conda with PyTorch installed
COPY --from=pytorch-install /opt/conda /opt/conda
Expand Down Expand Up @@ -217,9 +216,9 @@ COPY --from=builder /usr/src/target/release/lorax-router /usr/local/bin/lorax-ro
COPY --from=builder /usr/src/target/release/lorax-launcher /usr/local/bin/lorax-launcher

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
g++ \
&& rm -rf /var/lib/apt/lists/*
build-essential \
g++ \
&& rm -rf /var/lib/apt/lists/*


# Final image
Expand Down
71 changes: 0 additions & 71 deletions server/exllama_kernels/exllama_kernels/cuda_buffers.cu

This file was deleted.

52 changes: 0 additions & 52 deletions server/exllama_kernels/exllama_kernels/cuda_buffers.cuh

This file was deleted.

61 changes: 0 additions & 61 deletions server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu

This file was deleted.

19 changes: 0 additions & 19 deletions server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh

This file was deleted.

Loading

0 comments on commit 8347f58

Please sign in to comment.