Skip to content

Commit

Permalink
Modernized TPU setup scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 committed Jan 7, 2025
1 parent 4450aa3 commit aea92b9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
9 changes: 7 additions & 2 deletions scripts/create_tpu_single.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@ tpu_name=$1
type=$2
zone=$3
echo "Creating TPU: $tpu_name (type: $type zone: $zone)"
while ! gcloud alpha compute tpus tpu-vm create $tpu_name --accelerator-type=$type --zone=$zone --project=ai2-tpu --version=v2-alpha; do sleep 60; done
# while ! gcloud alpha compute tpus tpu-vm create $tpu_name --accelerator-type=$type --zone=$zone --project=ai2-tpu --version=v2-alpha; do sleep 60; done
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="git clone https://github.com/hamishivi/easylm.git"
# gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout dbf2212c1775b2762f7108d62c8c8b01b52ea4aa ."
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout dbf2212c1775b2762f7108d62c8c8b01b52ea4aa .; ./scripts/tpu_vm_setup.sh"
# gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8 .; ./scripts/tpu_vm_setup.sh"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m pip install wandb --upgrade"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m wandb login $WANDB_TOKEN"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="export HF_TOKEN=$HF_TOKEN"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m pip install -U 'huggingface_hub[cli]'"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="echo 'export PATH=\$PATH:~/.local/bin' >> ~/.bashrc"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="source ~/.bashrc"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="~/.local/bin/huggingface-cli login --token $HF_TOKEN"
2 changes: 1 addition & 1 deletion scripts/submit_tpu_train_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"--log_freq=50 "
"--save_model_freq=1000 "
"--save_milestone_freq=0 "
"--load_llama_config=13b "
"--load_llama_config=8b31 "
"--update_llama_config='' "
"--load_dataset_state='' "
"--load_checkpoint='params::{ckpt_gcs_path}' "
Expand Down

0 comments on commit aea92b9

Please sign in to comment.