Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/skypilot-org/skypilot int…
Browse files Browse the repository at this point in the history
…o wait-until-endpoint-ready
  • Loading branch information
Michaelvll committed Jul 1, 2024
2 parents c2b1a30 + 0a4b0ef commit cfc53f4
Show file tree
Hide file tree
Showing 89 changed files with 3,647 additions and 1,511 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RUN conda install -c conda-forge google-cloud-sdk && \
rm -rf /var/lib/apt/lists/*

# Install sky
RUN pip install --no-cache-dir "skypilot[all]==0.5.0"
RUN pip install --no-cache-dir "skypilot[all]==0.6.0"
28 changes: 16 additions & 12 deletions Dockerfile_k8s
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ FROM continuumio/miniconda3:23.3.1-0
# TODO(romilb): Investigate if this image can be consolidated with the skypilot
# client image (`Dockerfile`)

ARG DEBIAN_FRONTEND=noninteractive

# Initialize conda for root user, install ssh and other local dependencies
RUN apt update -y && \
apt install gcc rsync sudo patch openssh-server pciutils nano fuse socat netcat curl -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse socat netcat curl -y && \
rm -rf /var/lib/apt/lists/* && \
apt remove -y python3 && \
conda init
Expand All @@ -25,14 +27,20 @@ RUN useradd -m -s /bin/bash sky && \
# Switch to sky user
USER sky

# Set HOME environment variable for sky user
ENV HOME /home/sky

# Set current working directory
WORKDIR /home/sky

# Install SkyPilot pip dependencies preemptively to speed up provisioning time
RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \
pip install networkx oauth2client pandas pendulum PrettyTable && \
pip install ray[default]==2.9.3 rich tabulate filelock && \
pip install packaging 'protobuf<4.0.0' pulp && \
pip install pycryptodome==3.12.0 && \
pip install docker kubernetes==28.1.0 && \
pip install grpcio==1.51.3 python-dotenv==1.0.1
RUN conda init && \
pip install wheel Click colorama cryptography jinja2 jsonschema networkx \
oauth2client pandas pendulum PrettyTable rich tabulate filelock packaging \
'protobuf<4.0.0' pulp pycryptodome==3.12.0 docker kubernetes==28.1.0 \
grpcio==1.51.3 python-dotenv==1.0.1 ray[default]==2.9.3 && \
curl -LO "https://dl.k8s.io/release/v1.28.11/bin/linux/amd64/kubectl" && \
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl

# Add /home/sky/.local/bin/ to PATH
RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc
Expand All @@ -43,7 +51,3 @@ COPY --chown=sky . /skypilot/sky/

# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately
ENV PYTHONUNBUFFERED=1

# Set WORKDIR and initialize conda for sky user
WORKDIR /home/sky
RUN conda init
70 changes: 36 additions & 34 deletions Dockerfile_k8s_gpu
Original file line number Diff line number Diff line change
@@ -1,46 +1,52 @@
# TODO(romilb) - The base image used here (ray) is very large (11.4GB).
# as a result, this built image is about 13.5GB. We need to pick a lighter base
# image.
FROM rayproject/ray:2.9.3-py310-gpu
# We use the cuda runtime image instead of devel image to reduce size (1.3GB vs 3.6GB)
FROM nvidia/cuda:12.1.1-runtime-ubuntu20.04

# Initialize conda for root user, install ssh and other local dependencies
ARG DEBIAN_FRONTEND=noninteractive

# Install ssh and other local dependencies
# We remove cuda lists to avoid conflicts with the cuda version installed by ray
RUN sudo rm -rf /etc/apt/sources.list.d/cuda* && \
sudo apt update -y && \
sudo apt install gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat curl -y && \
sudo rm -rf /var/lib/apt/lists/* && \
sudo apt remove -y python3 && \
conda init
RUN rm -rf /etc/apt/sources.list.d/cuda* && \
apt update -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat curl -y && \
rm -rf /var/lib/apt/lists/*

# Setup SSH and generate hostkeys
RUN sudo mkdir -p /var/run/sshd && \
sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \
cd /etc/ssh/ && \
sudo ssh-keygen -A

# Setup new user named sky and add to sudoers. \
# Also add /opt/conda/bin to sudo path and give sky user access to /home/ray
# Also add /opt/conda/bin to sudo path and give sky user permission to run sudo without password
RUN sudo useradd -m -s /bin/bash sky && \
sudo /bin/bash -c 'echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers' && \
sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" && \
sudo chmod -R a+rwx /home/ray
sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky"

# Switch to sky user
USER sky

# Set HOME environment variable for sky user, otherwise Ray base image HOME overrides
# Set HOME environment variable for sky user
ENV HOME /home/sky

# Setup SSH and generate hostkeys
RUN sudo mkdir -p /var/run/sshd && \
sudo chmod 0755 /var/run/sshd && \
sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \
cd /etc/ssh/ && \
ssh-keygen -A
# Set current working directory
WORKDIR /home/sky

# Install SkyPilot pip dependencies
RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \
pip install networkx oauth2client pandas pendulum PrettyTable && \
pip install rich tabulate filelock && \
pip install packaging 'protobuf<4.0.0' pulp && \
pip install pycryptodome==3.12.0 && \
pip install docker kubernetes==28.1.0 && \
pip install grpcio==1.51.3 python-dotenv==1.0.1
SHELL ["/bin/bash", "-c"]

# Install conda and other dependencies
# Keep the conda and Ray versions below in sync with the ones in skylet.constants
RUN curl https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -o Miniconda3-Linux-x86_64.sh && \
bash Miniconda3-Linux-x86_64.sh -b && \
eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true && conda activate base && \
grep "# >>> conda initialize >>>" ~/.bashrc || { conda init && source ~/.bashrc; } && \
rm Miniconda3-Linux-x86_64.sh && \
pip install wheel Click colorama cryptography jinja2 jsonschema networkx \
oauth2client pandas pendulum PrettyTable rich tabulate filelock packaging \
'protobuf<4.0.0' pulp pycryptodome==3.12.0 docker kubernetes==28.1.0 \
grpcio==1.51.3 python-dotenv==1.0.1 ray[default]==2.9.3 && \
curl -LO "https://dl.k8s.io/release/v1.28.11/bin/linux/amd64/kubectl" && \
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl

# Add /home/sky/.local/bin/ to PATH
RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc
Expand All @@ -51,7 +57,3 @@ COPY --chown=sky . /skypilot/sky/

# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately
ENV PYTHONUNBUFFERED=1

# Set WORKDIR and initialize conda for sky user
WORKDIR /home/sky
RUN conda init
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ To learn more, see our [Documentation](https://skypilot.readthedocs.io/en/latest
<!-- Keep this section in sync with index.rst in SkyPilot Docs -->
Runnable examples:
- LLMs on SkyPilot
- [GPT-2](./llm/gpt-2/)
- [GPT-2 via `llm.c`](./llm/gpt-2/)
- [Llama 3](./llm/llama-3/)
- [Qwen](./llm/qwen/)
- [Qwen](./llm/qwen/)
- [Databricks DBRX](./llm/dbrx/)
- [Gemma](./llm/gemma/)
- [Mixtral 8x7B](./llm/mixtral/); [Mistral 7B](https://docs.mistral.ai/self-deployment/skypilot/) (from official Mistral team)
Expand Down
1 change: 0 additions & 1 deletion docs/source/_static/custom.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ document.addEventListener('DOMContentLoaded', () => {
{ selector: '.caption-text', text: 'SkyServe: Model Serving' },
{ selector: '.toctree-l1 > a', text: 'Managed Jobs' },
{ selector: '.toctree-l1 > a', text: 'Running on Kubernetes' },
{ selector: '.toctree-l1 > a', text: 'DBRX (Databricks)' },
{ selector: '.toctree-l1 > a', text: 'Ollama' },
{ selector: '.toctree-l1 > a', text: 'Llama-3 (Meta)' },
{ selector: '.toctree-l1 > a', text: 'Qwen (Alibaba)' },
Expand Down
Loading

0 comments on commit cfc53f4

Please sign in to comment.