|
| 1 | +# start and stop GCP instances |
| 2 | +name: synthetic-consumers |
| 3 | + |
| 4 | +resources: |
| 5 | + cloud: gcp |
| 6 | + cpus: 1+ |
| 7 | + accelerators: L4:1 |
| 8 | + ports: |
| 9 | + # Ports for Ray head node and worker nodes |
| 10 | + - 6383 # GCS server (Ray head node port) |
| 11 | + - 8263 # Dashboard port (optional, if --include-dashboard is true) |
| 12 | + - 50001 # Ray client server port |
| 13 | + |
| 14 | +num_nodes: 1 |
| 15 | +envs: |
| 16 | + LLM_DEVICE: CUDA # CPU or CUDA |
| 17 | + |
| 18 | +# this will be synced to the node as `~/sky_workdir` |
| 19 | +workdir: ./ |
| 20 | +# The setup command. Will be run under the working directory. |
| 21 | +setup: | |
| 22 | + set -e # Exit if any command failed. |
| 23 | +
|
| 24 | + # install pixi and project dependencies |
| 25 | + curl -fsSL https://pixi.sh/install.sh | bash |
| 26 | + source /home/gcpuser/.bashrc |
| 27 | + pixi install --manifest-path pyproject.toml -e ray |
| 28 | +
|
| 29 | + # install system requirements needed for CPU based vllm inference |
| 30 | + if [ "${LLM_DEVICE}" == "CPU" ]; then |
| 31 | + echo "THIS FEATURE IS NOT IMPLEMENTED YET. Please set envs: LLM_DEVICE to CPU" >&2 # Print error message to stderr |
| 32 | + exit 1 # Exit with status code 1 |
| 33 | +
|
| 34 | + sudo apt-get install -y libssl-dev |
| 35 | + sudo mkdir /opt/vllm && sudo chown gcpuser /opt/vllm |
| 36 | + git clone https://github.com/vllm-project/vllm.git /opt/vllm && cd /opt/vllm && git fetch --all --tags && git checkout tags/v0.6.2 |
| 37 | +
|
| 38 | + # Build vllm for CPU using a docker environment. This saves us from a lot of hustle for the >1 year old Google Deep Learning Base Images. |
| 39 | + echo "NOTICE!: Building NDLL - this process can take **up to an hour** if using a minimal compute instance. Switch to a stronger instance or better use a GPU instance to avoid this step alltogether. " |
| 40 | + # FIXME: this builds wheels for python 3.10, but we need them for 3.12 |
| 41 | + cd /opt/vllm && DOCKER_BUILDKIT=1 docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . |
| 42 | + # TODO. copy wheels from /workspace/vllm/build/ to local filesystem and install them |
| 43 | + |
| 44 | + # /* REMOVE |
| 45 | + pixi run \ |
| 46 | + --environment ray \ |
| 47 | + --manifest-path pyproject.toml \ |
| 48 | + pip3 install wheel packaging ninja setuptools>=49.4.0 numpy setuptools-scm |
| 49 | + |
| 50 | + # build torch cpu |
| 51 | + pixi run \ |
| 52 | + --environment ray \ |
| 53 | + --manifest-path pyproject.toml \ |
| 54 | + pip3 install torch --index-url https://download.pytorch.org/whl/cpu # torch CPU |
| 55 | +
|
| 56 | + # build vllm torch integration |
| 57 | + VLLM_TARGET_DEVICE=cpu pixi run \ |
| 58 | + --environment ray \ |
| 59 | + --manifest-path pyproject.toml \ |
| 60 | + bash -c "cd /opt/vllm/ && python setup.py install" # vllm setup is required for CPU |
| 61 | + # |
| 62 | + # REMOVE END */ |
| 63 | + fi |
| 64 | +
|
| 65 | + # FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification |
| 66 | + pixi run \ |
| 67 | + --environment ray \ |
| 68 | + --manifest-path pyproject.toml \ |
| 69 | + pip3 install "ray[default,client]==2.37.0" "huggingface_hub[hf_transfer]" |
| 70 | +
|
| 71 | + pixi run \ |
| 72 | + --environment ray \ |
| 73 | + --manifest-path pyproject.toml \ |
| 74 | + pip3 install --force-reinstall "torch" |
| 75 | + |
| 76 | + # start separate ray for pymc-server |
| 77 | + # TODO: Launch the head-only command only on the first node in multinode setup |
| 78 | + pixi run \ |
| 79 | + --environment ray \ |
| 80 | + --manifest-path pyproject.toml \ |
| 81 | + ray start \ |
| 82 | + --head \ |
| 83 | + --port=6383 \ |
| 84 | + --ray-client-server-port=50001 \ |
| 85 | + --dashboard-host=0.0.0.0 \ |
| 86 | + --dashboard-port=8263 \ |
| 87 | + --disable-usage-stats |
| 88 | + |
| 89 | + # Download the model early. Downloads to ~/.cache/huggingface . All HF compatible libraries will try to find a model here. |
| 90 | + echo "Downloading your model - depending on the size of the model this may take a while" |
| 91 | + HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \ |
| 92 | + --environment ray \ |
| 93 | + --manifest-path pyproject.toml \ |
| 94 | + huggingface-cli download microsoft/Phi-3-mini-4k-instruct |
| 95 | +
|
| 96 | + # TODO: download the model from HF via MODLE_NAME env (might need HF_HUB_TOKEN) |
| 97 | +
|
| 98 | +
|
| 99 | +
|
| 100 | +# The command to run. Will be run under the working directory. |
| 101 | +run: | |
| 102 | + set -e # Exit if any command failed. |
| 103 | + echo "Available models on this instance:" |
| 104 | + # list locally available models |
| 105 | + HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \ |
| 106 | + --environment ray \ |
| 107 | + --manifest-path pyproject.toml \ |
| 108 | + huggingface-cli scan-cache |
| 109 | +
|
| 110 | + echo "Your instance is ready. Connect to it with pymc_server.connect(IP-ADDRESS). Find the IP-ADDRESS by running 'pymcs status', note the NODE-NAME and run 'pymcs status --ip NODE-NAME' to print your IP" |
0 commit comments