fix merge conflicts

allenai · Mar 27, 2024 · 7edf135 · 7edf135
2 parents ea0532b + 71f7014
commit 7edf135
Show file tree

Hide file tree

Showing 22 changed files with 8,768 additions and 17 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -62,7 +62,7 @@ jobs:
             task:
               name: Data pipeline
               run: |
-                python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate
+                python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate --ack-deprecated
 
     steps:
       - uses: actions/checkout@v3

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added support for Grouped Query Attention.
 - Added commonsense_qa and social_iqa downstream evaluation tasks
+- Added MMLU multiple choice (A/B/C/D) 5-shot variant downstream tasks
 
 ### Changed
 
@@ -26,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Don't log garbage on nodes that aren't rank 0
 - Don't crash in the HF code when we are referring to a tokenizer in a local file
+- Fixed the size calculation for qk layer norm
 
 ## [v0.2.5](https://github.com/allenai/OLMo/releases/tag/v0.2.5) - 2024-03-06
 
@@ -35,11 +37,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added the option to directly pass input embeddings to `OLMo` and `OLMoForCausalLM`.
 - Added support for Python 3.8.
 - Added code to throw an error if `output_attentions` is set to `True` in forward call to `OLMoForCausalLM`. This functionality hasn't been implemented yet.
-- Fixed running with data loading workers on LUMI
+- Correct scheme displayed in error messages that come from R2
+- Fixed running with multiple data loading workers in LUMI
 - Minor bug fix: uninitialized prompts variable
 
 ### Added
 - Added `output_hidden_states` argument and associated functionality to `OLMo` and `OLMoForCausalLM` to return model intermediate hidden states.
+- Ability to read from R2 like we read from S3
 - Added MMLU downstream evaluation tasks, with prompt variations.
 - Added support for PyTorch v2.2.
 - Added ability to show logs from all ranks

diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml
@@ -0,0 +1,47 @@
+name: olmo7-ablation-baseline  # can't have "_" or "." here
+image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04
+compute:
+  gpus: 64
+  cluster: r7z2
+  gpu_type: a100_40gb
+integrations:
+  - integration_type: git_repo
+    git_repo: allenai/OLMo
+    git_branch: olmo7-ablations
+    #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729
+    pip_install: -e .[train]
+    ssh_clone: true
+command: |-
+  pip freeze
+  mkdir -p /root/.cache/torch/
+
+  export OMP_NUM_THREADS=8
+  export LOG_FILTER_TYPE=all_ranks
+  #export OLMO_NO_SSL=1
+
+  # warm up huggingface cache
+  pushd /root/.cache
+  curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf -
+  popd
+  export HF_DATASETS_OFFLINE=1
+
+  cd OLMo
+  
+  torchrun \
+  --master_addr $MASTER_ADDR \
+  --master_port $MASTER_PORT \
+  --nnodes $NUM_NODES \
+  --node_rank $NODE_RANK \
+  --nproc_per_node 8 \
+  scripts/train.py configs/olmo7-ablation-baseline.yaml \
+    --run_name=olmo7-ablation-baseline \
+    --wandb.name=baseline \
+    --model.flash_attention=true \
+    --fsdp.wrapping_strategy=by_block_and_size \
+    --fsdp.sharding_strategy=FULL_SHARD \
+    --save_folder=runs/ \
+    --activation_checkpointing=whole_layer \
+    --device_train_microbatch_size=3 \
+    --global_train_batch_size=6144 \
+    --wandb.group=baseline3 \
+    --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3
diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml
@@ -0,0 +1,46 @@
+name: olmo7-ablation-dedupedocs  # can't have "_" or "." here
+image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04
+compute:
+  gpus: 64
+  cluster: r14z3p2
+  gpu_type: h100_80gb
+integrations:
+  - integration_type: git_repo
+    git_repo: allenai/OLMo
+    git_branch: olmo7-ablations
+    #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729
+    pip_install: -e .[train]
+    ssh_clone: true
+command: |-
+  pip freeze
+  mkdir -p /root/.cache/torch/
+
+  export OMP_NUM_THREADS=8
+  export LOG_FILTER_TYPE=all_ranks
+  #export OLMO_NO_SSL=1
+
+  # warm up huggingface cache
+  pushd /root/.cache
+  curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf -
+  popd
+  export HF_DATASETS_OFFLINE=1
+
+  cd OLMo
+  
+  torchrun \
+  --master_addr $MASTER_ADDR \
+  --master_port $MASTER_PORT \
+  --nnodes $NUM_NODES \
+  --node_rank $NODE_RANK \
+  --nproc_per_node 8 \
+  scripts/train.py configs/olmo7-ablation-dedupedocs.yaml \
+    --run_name=olmo7-ablation-dedupedocs \
+    --wandb.name=dedupedocs \
+    --model.flash_attention=true \
+    --fsdp.wrapping_strategy=by_block_and_size \
+    --fsdp.sharding_strategy=SHARD_GRAD_OP \
+    --save_folder=runs/ \
+    --device_train_microbatch_size=3 \
+    --global_train_batch_size=6144 \
+    --wandb.group=dedupedocs \
+    --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs
diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml
@@ -0,0 +1,47 @@
+name: olmo7-ablation-dolma17  # can't have "_" or "." here
+image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04
+compute:
+  gpus: 128
+  cluster: r12z3
+  gpu_type: a100_40gb
+integrations:
+  - integration_type: git_repo
+    git_repo: allenai/OLMo
+    git_branch: olmo7-ablations
+    #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729
+    pip_install: -e .[train]
+    ssh_clone: true
+command: |-
+  pip freeze
+  mkdir -p /root/.cache/torch/
+
+  export OMP_NUM_THREADS=8
+  export LOG_FILTER_TYPE=all_ranks
+  #export OLMO_NO_SSL=1
+
+  # warm up huggingface cache
+  pushd /root/.cache
+  curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf -
+  popd
+  export HF_DATASETS_OFFLINE=1
+
+  cd OLMo
+  
+  torchrun \
+  --master_addr $MASTER_ADDR \
+  --master_port $MASTER_PORT \
+  --nnodes $NUM_NODES \
+  --node_rank $NODE_RANK \
+  --nproc_per_node 8 \
+  scripts/train.py configs/olmo7-ablation-dolma17.yaml \
+    --run_name=olmo7-ablation-dolma17 \
+    --wandb.name=dolma17 \
+    --model.flash_attention=true \
+    --fsdp.wrapping_strategy=by_block_and_size \
+    --fsdp.sharding_strategy=FULL_SHARD \
+    --save_folder=runs/ \
+    --activation_checkpointing=whole_layer \
+    --device_train_microbatch_size=3 \
+    --global_train_batch_size=6144 \
+    --wandb.group=dolma17 \
+    --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dolma17