Skip to content

Commit

Permalink
Mcli config for v0-step_1T-warmup_true
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkgr committed Mar 30, 2024
1 parent f2ce790 commit 5c01987
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions configs/mcli/annealing/v0-step_1T-warmup_true.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: v0-step_1T-warmup_true # can't have "_" or "." here
image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04
compute:
gpus: 32
cluster: r15z1p1
gpu_type: h100_80gb
integrations:
- integration_type: git_repo
git_repo: allenai/OLMo
git_branch: dave/annealing
#git_commit: d765e8819f5b0be204c96b0b519de2372b0da729
pip_install: -e .[train]
ssh_clone: true
command: |-
pip freeze
mkdir -p /root/.cache/torch/
export OMP_NUM_THREADS=8
export LOG_FILTER_TYPE=all_ranks
#export OLMO_NO_SSL=1
# warm up huggingface cache
pushd /root/.cache
curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache_v3.tar.gz" | tar -xzf -
popd
export HF_DATASETS_OFFLINE=1
cd OLMo
torchrun \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT \
--nnodes $NUM_NODES \
--node_rank $NODE_RANK \
--nproc_per_node 8 \
scripts/train.py configs/annealing/v0-step_1T-warmup_true.yaml

0 comments on commit 5c01987

Please sign in to comment.