Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Llama-3.1 experiments #35

Merged
merged 23 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2cd1a2f
Add llama3.1 setup
ljvmiranda921 Nov 22, 2024
5f46dfb
Update EasyLM version
ljvmiranda921 Nov 22, 2024
5906461
Add HF_TOKEN for TPU run
ljvmiranda921 Nov 22, 2024
7320599
Fix Llama-3.1 naming
ljvmiranda921 Nov 25, 2024
4450aa3
Update the name of the logger
ljvmiranda921 Jan 7, 2025
aea92b9
Modernized TPU setup scripts
ljvmiranda921 Jan 7, 2025
ae23985
Add Dockerfile and convert script for llama31
ljvmiranda921 Jan 7, 2025
47b1a3a
Use auto version of rewardbench
ljvmiranda921 Jan 9, 2025
05eeea7
Use meta-llama-3.1 chat template for rewardbench
ljvmiranda921 Jan 9, 2025
ee047ef
Migrate from cirrascale
ljvmiranda921 Jan 9, 2025
129fdff
Use tokenizer rather than some chat_template
ljvmiranda921 Jan 9, 2025
9583d02
Do not include adhoc_scripts anymore
ljvmiranda921 Jan 14, 2025
53e1970
Incorporate optimal simulated experiments
ljvmiranda921 Jan 14, 2025
867db06
Attempt parallelization
ljvmiranda921 Jan 16, 2025
3a8f012
Add 1M simulations experiment
ljvmiranda921 Jan 16, 2025
66ff967
Better logging and get max workers always
ljvmiranda921 Jan 16, 2025
d54a30f
Merge pull request #36 from allenai/scale
ljvmiranda921 Jan 17, 2025
89aa4b9
No need for adhoc_scripts ignore
ljvmiranda921 Jan 17, 2025
66304fd
Better comments
ljvmiranda921 Jan 17, 2025
f8d29c2
Update TPU setup
ljvmiranda921 Jan 17, 2025
ee86c59
Run isort on the codebase
ljvmiranda921 Jan 17, 2025
d720a14
Download scaling results
ljvmiranda921 Jan 21, 2025
7716e90
Add scripts for plotting simulation scaling (#37)
ljvmiranda921 Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions beaker/simulate_1m.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
version: v2
budget: ai2/oe-adapt
description: "Get features using datamodel approach given many simulations"
tasks:
- name: simulate-large-1e6
image:
beaker: ljm/human-datamodel
command: ["python", "-m", "scripts.sample_best_subset"]
arguments:
- --input_path
- /source/features.jsonl
- --output_dir
- /output/
- --model_path
- /model/model.pkl
- --sampling_method
- optimal_simulated
- --budgets
- 0.25
- 0.50
- 0.75
- --n_simulations
- 1000000
- --response_a_col
- response_a
- --response_b_col
- response_b
datasets:
- mountPath: /source/
source:
beaker: ljm/helpsteer2-pref-feats
- mountPath: /model/
source:
beaker: ljm/hybrid-prefs-multipref-quad-model
result:
path: /output
resources:
gpuCount: 2
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
# - ai2/saturn-cirrascale
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: TOKENIZERS_PARALLELISM
value: false
- name: simulate-large-1e5
image:
beaker: ljm/human-datamodel
command: ["python", "-m", "scripts.sample_best_subset"]
arguments:
- --input_path
- /source/features.jsonl
- --output_dir
- /output/
- --model_path
- /model/model.pkl
- --sampling_method
- optimal_simulated
- --budgets
- 0.25
- 0.50
- 0.75
- --n_simulations
- 100000
- --response_a_col
- response_a
- --response_b_col
- response_b
datasets:
- mountPath: /source/
source:
beaker: ljm/helpsteer2-pref-feats
- mountPath: /model/
source:
beaker: ljm/hybrid-prefs-multipref-quad-model
result:
path: /output
resources:
gpuCount: 1
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
# - ai2/saturn-cirrascale
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: TOKENIZERS_PARALLELISM
value: false
- name: simulate-large-1e4
image:
beaker: ljm/human-datamodel
command: ["python", "-m", "scripts.sample_best_subset"]
arguments:
- --input_path
- /source/features.jsonl
- --output_dir
- /output/
- --model_path
- /model/model.pkl
- --sampling_method
- optimal_simulated
- --budgets
- 0.25
- 0.50
- 0.75
- --n_simulations
- 10000
- --response_a_col
- response_a
- --response_b_col
- response_b
datasets:
- mountPath: /source/
source:
beaker: ljm/helpsteer2-pref-feats
- mountPath: /model/
source:
beaker: ljm/hybrid-prefs-multipref-quad-model
result:
path: /output
resources:
gpuCount: 1
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
# - ai2/saturn-cirrascale
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: TOKENIZERS_PARALLELISM
value: false
- name: simulate-large-1e3
image:
beaker: ljm/human-datamodel
command: ["python", "-m", "scripts.sample_best_subset"]
arguments:
- --input_path
- /source/features.jsonl
- --output_dir
- /output/
- --model_path
- /model/model.pkl
- --sampling_method
- optimal_simulated
- --budgets
- 0.25
- 0.50
- 0.75
- --n_simulations
- 1000
- --response_a_col
- response_a
- --response_b_col
- response_b
datasets:
- mountPath: /source/
source:
beaker: ljm/helpsteer2-pref-feats
- mountPath: /model/
source:
beaker: ljm/hybrid-prefs-multipref-quad-model
result:
path: /output
resources:
gpuCount: 1
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
# - ai2/saturn-cirrascale
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: TOKENIZERS_PARALLELISM
value: false
3 changes: 2 additions & 1 deletion evals/convert.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ ENV PATH="/usr/local/google-cloud-sdk/bin:${PATH}"

# Clone EasyLM repository
RUN git clone https://github.com/hamishivi/EasyLM.git . && \
git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8
# git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8
git checkout dbf2212c1775b2762f7108d62c8c8b01b52ea4aa

COPY ai2-allennlp-79f5e3a8e95a.json /root/.config/gcloud/application_default_credentials.json
# Set environment variable for Google Cloud
Expand Down
23 changes: 14 additions & 9 deletions evals/convert_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ def main():
)
pytorch_dir = Path(args.pytorch_dir)
for params_path in params_paths:
experiment_name = params_path.parent.stem.split("--")[0]
if "llama" in str(params_path):
experiment_name = params_path.parts[-2].replace(".", "-").split("--")[0]
else:
experiment_name = params_path.parent.stem.split("--")[0]
if args.prefix:
experiment_name = f"{args.prefix}-{experiment_name}"
output_dir = pytorch_dir / experiment_name
Expand Down Expand Up @@ -250,28 +253,30 @@ def create_beaker_experiment_spec(
tasks=[
TaskSpec(
name=f"evaluate-{experiment_name}",
image=ImageSource(beaker="nathanl/rb_v16"),
image=ImageSource(beaker="nathanl/rewardbench_auto"),
constraints=Constraints(
cluster=[
"ai2/allennlp-cirrascale",
# "ai2/jupiter-cirrascale-2",
"ai2/saturn-cirrascale",
"ai2/ceres-cirrascale",
"ai2/jupiter-cirrascale-2",
]
),
context=TaskContext(priority="normal", preemptible=True),
result=ResultSpec(path="/output"),
command=["/bin/sh", "-c"],
arguments=[
"python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --chat_template tulu --trust_remote_code --do_not_save"
"python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --trust_remote_code --do_not_save"
],
datasets=[
DataMount(
source=DataSource(beaker=dataset_name),
mount_path="/reward_model",
),
DataMount(
source=DataSource(host_path="/net/nfs.cirrascale"),
mount_path="/net/nfs.cirrascale",
),
# There's no more NFS but we'll keep this here for posterity
# DataMount(
# source=DataSource(host_path="/net/nfs.cirrascale"),
# mount_path="/net/nfs.cirrascale",
# ),
],
resources=TaskResources(gpu_count=1),
env_vars=[
Expand Down
40 changes: 40 additions & 0 deletions evals/templates/template-llama31.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
version: v2
budget: ai2/oe-adapt
description: "Convert model to pytorch and launch a rewardbench eval job"
tasks:
- name: template
image:
beaker: ljm/easylm-convert-llama
command: ["python", "convert_to_hf.py"]
arguments:
- --gcs_bucket
- ljm-dev
- --batch_size
- 1
- --tokenizer_path
- meta-llama/Llama-3.1-8B
- --model_size
- 8b31
result:
path: /output
resources:
gpuCount: 1
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
- ai2/saturn-cirrascale
- ai2/jupiter-cirrascale-2
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: GOOGLE_SERVICE_ACCOUNT
secret: GOOGLE_SERVICE_ACCOUNT
- name: BEAKER_TOKEN
secret: BEAKER_TOKEN
- name: TOKENIZERS_PARALLELISM
value: "false"
- name: HF_TOKEN
secret: HF_TOKEN
9 changes: 7 additions & 2 deletions scripts/create_tpu_single.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@ tpu_name=$1
type=$2
zone=$3
echo "Creating TPU: $tpu_name (type: $type zone: $zone)"
while ! gcloud alpha compute tpus tpu-vm create $tpu_name --accelerator-type=$type --zone=$zone --project=ai2-tpu --version=v2-alpha --preemptible; do sleep 60; done
while ! gcloud alpha compute tpus tpu-vm create $tpu_name --accelerator-type=$type --zone=$zone --project=ai2-tpu --version=v2-alpha; do sleep 60; done
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="git clone https://github.com/hamishivi/easylm.git"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8 .; ./scripts/tpu_vm_setup.sh"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout dbf2212c1775b2762f7108d62c8c8b01b52ea4aa .; ./scripts/tpu_vm_setup.sh"
# gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="cd easylm; git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8 .; ./scripts/tpu_vm_setup.sh"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m pip install wandb --upgrade"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m wandb login $WANDB_TOKEN"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="python3 -m pip install -U 'huggingface_hub[cli]'"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="echo 'export PATH=\$PATH:~/.local/bin' >> ~/.bashrc"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="source ~/.bashrc"
gcloud alpha compute tpus tpu-vm ssh $tpu_name --zone=$zone --project=ai2-tpu --worker=all --command="~/.local/bin/huggingface-cli login --token $HF_TOKEN"
11 changes: 11 additions & 0 deletions scripts/experiment_optimal_simulated.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
for i in 256 512 1024 2048 4096 8192; do
python3 -m scripts.sample_best_subset \
--input_path data/hs2p_all_features/features.jsonl \
--output_dir data/hs2p_best_mixes_optimal_simulated_$i \
--model_path data/multipref_quadratic_model/model.pkl \
--sampling_method optimal_simulated \
--budgets 0.25 0.50 0.75 \
--n_simulations $i \
--response_a_col response_a \
--response_b_col response_b
done
5 changes: 5 additions & 0 deletions scripts/experiment_scaling_download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
for i in 256 512 1024 2048 4096 8192; do
python3 scripts/fetch_evals_rewardbench.py \
--output_path data/hs2p-$i-results-llama.csv \
--experiment_prefix rm-eval-hs2p-$i-llama3
done
23 changes: 19 additions & 4 deletions scripts/get_count_feats.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import argparse
import json
import logging
import os
import random
import sys
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Optional

Expand Down Expand Up @@ -94,7 +96,8 @@ def generate_instances(
tags = []
uuids = [uuid.uuid4().hex for _ in range(len(budgets))]
budget_instances: dict[str, dict[str, int]] = {}
for id, budget in tqdm(zip(uuids, budgets), total=len(budgets)):

def process_budget(id: str, budget: int) -> str:
instances_to_swap = run_knapsack(capacity=budget, items=feat_instance_map)

tag = f"ID__{id}__SWAPS_{budget}"
Expand All @@ -111,7 +114,7 @@ def generate_instances(
)
df_swapped["is_swapped"] = df["id"].apply(lambda x: x in instances_to_swap)
annotations = df_swapped.to_dict(orient="records")
converted_annotations = []
converted_annotations: list[dict[str, Optional[str]]] = []
for annotation in annotations:
if "model_a" not in annotation:
annotation["model_a"] = ""
Expand All @@ -134,7 +137,7 @@ def generate_instances(
f.write(json.dumps(annotation) + "\n")

# Save the budget
budget_instance_map = {}
budget_instance_map: dict[str, int] = {}
swapped_ids = [eg["id"] for eg in converted_annotations if eg["is_swapped"]]
swapped_df = df[df["id"].isin(swapped_ids)].reset_index(drop=True)
for feature_str in all_features:
Expand All @@ -148,7 +151,19 @@ def generate_instances(
budget_instances[tag] = budget_instance_map

# Save the tag file to create the experiments.txt later
tags.append(f"{swaps_outfile.stem}::{counts_outfile.stem}")
return f"{swaps_outfile.stem}::{counts_outfile.stem}"

with tqdm(total=len(budgets)) as pbar:
with ThreadPoolExecutor(max_workers=None) as executor:
n_workers = executor._max_workers
logging.info(f"Running simulation on {n_workers} workers")
futures = {
executor.submit(process_budget, id, budget): id
for id, budget in zip(uuids, budgets)
}
for future in as_completed(futures):
tags.append(future.result())
pbar.update(1)

experiments_file = output_dir / "experiments.txt"
with experiments_file.open("w") as f:
Expand Down
Loading