Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Slim down docker, fix OSS cluster config (#106)
Browse files Browse the repository at this point in the history
Closes https://github.com/anyscale/aviary/issues/92
Closes #5

---------

Signed-off-by: Antoni Baum <[email protected]>
  • Loading branch information
Yard1 authored Jun 3, 2023
1 parent 5f1fa11 commit 2f24d9d
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
7 changes: 0 additions & 7 deletions deploy/_internal/backend/cluster-env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ post_build_cmds:
pip install \
"async_timeout" \
"markdown-it-py[plugins]" \
"git+https://github.com/huggingface/diffusers.git" \
"accelerate" \
"transformers>=4.25.1" \
"datasets" \
Expand All @@ -29,14 +28,8 @@ post_build_cmds:
"bitsandbytes" \
"git+https://github.com/Yard1/DeepSpeed.git@aviary" \
"numpy<1.24" \
"pytorch-lightning" \
"ninja" \
"protobuf<3.21.0" \
"git+https://github.com/huggingface/optimum.git" \
"torchmetrics" \
"git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836" \
"lm_eval==0.3.0" \
"tiktoken==0.1.2" \
"pybind11==2.6.2" \
"einops==0.3.0" \
"safetensors"
20 changes: 8 additions & 12 deletions deploy/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
FROM rayproject/ray-ml:nightly-gpu
FROM rayproject/ray:nightly-cu118

ENV HF_HUB_ENABLE_HF_TRANSFER=1

RUN sudo apt-get update
RUN sudo apt-get install -y libaio-dev git-lfs awscli
RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/*

RUN pip install --upgrade pip
RUN pip uninstall -y ray torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric tensorflow
RUN conda install python=3.10
RUN pip install "ray[default,serve] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"
RUN pip install --upgrade pip && pip install "ray[default,serve] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"
RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
COPY "./dist" "/home/ray/dist"
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend]"


COPY "./deploy/ray/backend.yaml" "/home/ray/abcd"

# The build context should be the root of the repo
# So this gives the model definitions
COPY "./models" "/home/ray/models"

RUN echo "Testing aviary install" && python -c "import aviary.backend"
ENV HF_HUB_ENABLE_HF_TRANSFER=1
RUN echo "Testing aviary install" && python -c "import aviary.backend"

RUN pip cache purge && conda clean -a && rm -rf ~/.cache
20 changes: 20 additions & 0 deletions deploy/ray/aviary-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@ docker:
image: "anyscale/aviary:latest"
container_name: "aviary"

# All the 'conda activate' are necessary to ensure we are in the
# python 3.10 conda env.
setup_commands:
- echo "conda activate" >> ~/.bashrc

head_setup_commands:
- conda activate && pip install 'boto3>=1.4.8'

worker_setup_commands: []

head_start_ray_commands:
- conda activate && ray stop
- conda activate && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0

worker_start_ray_commands:
- conda activate && ray stop
# We need to make sure RAY_HEAD_IP env var is accessible
# after conda activate.
- export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && conda activate && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076

available_node_types:
head_node_type:
node_config:
Expand Down
7 changes: 0 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"torch>=2.0.0",
"torchaudio>=2.0.0",
"torchvision>=0.15.2",
"diffusers @ git+https://github.com/huggingface/diffusers.git",
"accelerate",
"transformers>=4.25.1",
"datasets",
Expand All @@ -35,16 +34,10 @@
"bitsandbytes",
"deepspeed @ git+https://github.com/Yard1/DeepSpeed.git@aviary",
"numpy<1.24",
"pytorch-lightning",
"ninja",
"protobuf<3.21.0",
"optimum @ git+https://github.com/huggingface/optimum.git",
"torchmetrics",
"lm_dataformat @ git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836",
"lm_eval==0.3.0",
"tiktoken==0.1.2",
"pybind11==2.6.2",
"einops==0.3.0",
"safetensors",
"pydantic==1.10.7",
"markdown-it-py[plugins]",
Expand Down

0 comments on commit 2f24d9d

Please sign in to comment.