allenai · mnoukhov · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -159,3 +159,4 @@ dmypy.json
 cache/
 local_dataset_cache/
 scratch/
+vllm_olmo2.5/
diff --git a/Dockerfile b/Dockerfile
@@ -65,6 +65,8 @@ ENV UV_CACHE_DIR=/root/.cache/uv
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
 ENV UV_COMPILE_BYTECODE=0
 
+RUN git clone -b shanea/olmo2-retrofit https://github.com/2015aroras/vllm.git vllm_olmo2.5
+
 # Install dependencies
 RUN --mount=type=cache,target=${UV_CACHE_DIR} \
     --mount=type=bind,source=uv.lock,target=uv.lock \
@@ -78,7 +80,7 @@ COPY configs configs
 COPY scripts scripts
 COPY mason.py mason.py
 # Copy oe-eval-internal if it exists (wildcard pattern won't fail if missing)
-COPY oe-eval-interna[l] oe-eval-internal/
+COPY oe-eval-internal oe-eval-internal
 COPY open_instruct open_instruct
 
 # Add build arguments for git information

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: style quality
+.PHONY: style quality docker
 
 # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 export PYTHONPATH = open_instruct
@@ -16,3 +16,13 @@ style-check:   ## *fail* if anything needs rewriting
 
 quality-check: ## *fail* if any rewrite was needed
 	uv run ruff check --exit-non-zero-on-fix $(check_dirs)
+
+setup:
+	git clone -b shanea/olmo2-retrofit https://github.com/2015aroras/vllm.git vllm_olmo2.5
+
+docker:
+	DOCKER_BUILDKIT=1 docker build -f Dockerfile --build-arg UV_CACHE_DIR=$(UV_CACHE_DIR) -t open_instruct_olmo2_retrofit .
+	# if you are internally at AI2, you can create an image like this:
+	$(eval beaker_user := $(shell beaker account whoami --format json | jq -r '.[0].name'))
+	beaker image delete $(beaker_user)/open_instruct_olmo2_retrofit
+	beaker image create open_instruct_olmo2_retrofit -n open_instruct_olmo2_retrofit -w ai2/$(beaker_user)
diff --git a/generate_olmo25.sh b/generate_olmo25.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+MODEL_NAME_OR_PATH="/weka/oe-training-default/ai2-llm/checkpoints/tylerr/long-context/olmo25_7b_lc_64k_6T_M100B_round5-sparkle_6634-pre_s2pdf_gzip2080_cweN-yake-all-olmo_packing_yarn-fullonly_50B-fb13a737/step11921-hf"
+# DATASET="mnoukhov/DAPO-Math-14k-Processed-RLVR"
+DATASET="TTTXXX01/MATH_3000_Filtered"
+EXP_NAME="generate_olmo25_teng3k"
+
+python mason.py \
+    --task_name ${EXP_NAME} \
+    --cluster ai2/jupiter \
+    --image ${1:-michaeln/open_instruct_olmo2_retrofit} \
+    --workspace ai2/tulu-thinker \
+    --priority high \
+    --pure_docker_mode \
+    --preemptible \
+    --gpus 2 \
+    --num_nodes 1 \
+    --max_retries 0 \
+    --budget ai2/oe-adapt \
+    --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+    --env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
+    -- \
+python scripts/data/rlvr/filtering_vllm.py \
+    --model $MODEL_NAME_OR_PATH \
+    --dataset $DATASET \
+    --split train \
+    --temperature 0.7 \
+    --top_p 0.95 \
+    --offset 0 \
+    --size 100000 \
+    --chat_template olmo_thinker_r1_style_nochat \
+    --output-file filtered_datasets/olmo25_7b_lc_dapo.jsonl \
+    --number_samples 16
diff --git a/open_instruct/dataset_transformation.py b/open_instruct/dataset_transformation.py
@@ -442,6 +442,33 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
         "{% endif %}"
         "{% endfor %}"
     ),
+    "olmo_thinker_r1_style_nochat": (
+        "Solve the following math problem step by step. "
+        "Reason about the question in <think> </think> tags "
+        "then provide the final answer in <answer> </answer> tags "
+        "so the full response is <think> reasoning process here </think> "
+        "<answer> answer here </answer>."
+        "\n\n"
+        "{% for message in messages %}"
+        "{{ '\n\n' if not loop.first else '' }}"
+        "{{ message['content'] + '\n' }}"
+        "{% if loop.last and add_generation_prompt %}"
+        "{{ 'Solving step by step\n<think>' }}"
+        "{% endif %}"
+        "{% endfor %}"
+    ),
+    "olmo_thinker_dapo": (
+        "Solve the following math problem step by step. "
+        "The last line of your response should be the answer to the problem in form Answer: $Answer (without quotes) where $Answer is the answer to the problem."
+        "\n\n"
+        "{% for message in messages %}"
+        "{{ '\n\n' if not loop.first else '' }}"
+        "{{ message['content'] + '\n' }}"
+        "{% if loop.last and add_generation_prompt %}"
+        "{{ '\nRemember to put your answer on its own line after \"Answer:\"' }}"
+        "{% endif %}"
+        "{% endfor %}"
+    ),
     # template is taken from https://arxiv.org/abs/2501.12948.
     "r1_simple_chat": (
         "A conversation between User and Assistant. "

diff --git a/open_instruct/grpo_fast.py b/open_instruct/grpo_fast.py
@@ -405,6 +405,8 @@ class Args:
     """the max generation length for evaluation for oe-eval"""
     oe_eval_beaker_image: Optional[str] = None
     """the docker image for evaluation for oe-eval"""
+    oe_eval_gpu_multiplier: Optional[int] = 1
+    """gpu mulitplier for eval jobs"""
     eval_priority: Literal["low", "normal", "high", "urgent"] = "normal"
     """the priority of auto-launched evaluation jobs"""
 
@@ -1224,6 +1226,7 @@ def launch_ai2_evals_on_weka_wrapper(self, step_dir, leaderboard_name, wandb_url
                 args.gs_bucket_path,
                 args.eval_priority,
                 args.oe_eval_beaker_image,
+                args.oe_eval_gpu_multiplier,
             )
 
 
@@ -2366,7 +2369,7 @@ def one_training_step(
             )
 
     save_time = 0
-    if args.save_freq > 0 and training_step % args.save_freq == 0 and (args.eval_on_step_0 or training_step > 1):
+    if args.save_freq > 0 and (training_step % args.save_freq == 0 or (training_step == 1 and args.eval_on_step_0)):
         with Timer("[Main Thread] 🗡️ Saving model") as timer:
             checkpoint_dir = f"{args.output_dir}_checkpoints"
             step_dir = os.path.join(checkpoint_dir, f"step_{training_step}")

diff --git a/open_instruct/utils.py b/open_instruct/utils.py
@@ -1145,6 +1145,7 @@ def launch_ai2_evals_on_weka(
     gs_bucket_path: Optional[str] = None,
     eval_priority: Optional[str] = "normal",
     beaker_image: Optional[str] = None,
+    oe_eval_gpu_multiplier: Optional[int] = 1,
 ) -> None:
     weka_cluster = "ai2/saturn-cirrascale ai2/neptune-cirrascale"
     gcp_cluster = "ai2/augusta-google-1"
@@ -1174,6 +1175,7 @@ def launch_ai2_evals_on_weka(
 
     command = f"""\
 python scripts/submit_eval_jobs.py \
+--gpu_multiplier {oe_eval_gpu_multiplier} \
 --model_name {leaderboard_name} \
 --location {path} \
 --cluster {cluster} \

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,17 +19,17 @@ dependencies = [
     "nvitop>=1.4.2",
     "packaging>=24.2",
     "peft>=0.13.2",
-    "ray[default]>=2.44.1",
+    "ray[default]==2.46.0",
     "setuptools>=75.6.0,<80.0.0",
     "tensorboard>=2.18.0",
     "torch>=2.7.0,<2.8",
-    "transformers>=4.52.4,<4.54.0", # see https://github.com/vllm-project/vllm-ascend/issues/2046
-    "vllm==0.9.1",
+    "transformers @ git+https://github.com/2015aroras/transformers.git@shanea/olmo2-retrofit",
     "wandb==0.18.1",
     "langdetect==1.0.9",
     "immutabledict==1.2.0",
     "flash-attn>=2.8.0.post2; platform_system != 'Darwin'",
     "liger-kernel>=0.5.4; platform_system != 'Darwin'",
+    "vllm" # installed locally with git clone because otherwise errors
 ]
 
 [build-system]
@@ -44,12 +44,14 @@ flash-attn = [{ requirement = "torch", match-runtime = true }]
 
 [tool.uv.extra-build-variables]
 flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
+vllm = { VLLM_USE_PRECOMPILED = "1" }
 
 # pytorch related setups
 [tool.uv.sources]
 torch = [
   { index = "pytorch-cu128", marker = "platform_system != 'Darwin'"},
 ]
+vllm = { path = "vllm_olmo2.5", editable = true }
 
 [[tool.uv.index]]
 name = "pytorch-cu128"
@@ -95,6 +97,7 @@ target-version = ['py310']
 
 [tool.isort]
 known_first_party = ["open_instruct"]
+known-third-party = ["wandb"]
 profile = "black"
 src_paths = ["open_instruct"]
 

diff --git a/scripts/data/rlvr/filtering_vllm.py b/scripts/data/rlvr/filtering_vllm.py
@@ -1,4 +1,4 @@
-'''
+"""
 python mason.py \
   --cluster ai2/jupiter-cirrascale-2 --image nathanl/open_instruct_auto \
   --workspace ai2/tulu-thinker \
@@ -15,7 +15,8 @@
   --size 100000 \
   --output-file filtered_datasets/qwen2_5_openthoughts2/orz.jsonl \
   --number_samples 8
-'''
+"""
+
 import argparse
 import json
 
@@ -27,65 +28,20 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Bulk-generate N samples per HF dataset record using vLLM."
-    )
-    parser.add_argument(
-        "--model",
-        required=True,
-        help="vLLM model ID (e.g. facebook/opt-125m)"
-    )
-    parser.add_argument(
-        "--dataset",
-        required=True,
-        help="HF dataset name (e.g. squad)"
-    )
+    parser = argparse.ArgumentParser(description="Bulk-generate N samples per HF dataset record using vLLM.")
+    parser.add_argument("--model", required=True, help="vLLM model ID (e.g. facebook/opt-125m)")
+    parser.add_argument("--dataset", required=True, help="HF dataset name (e.g. squad)")
+    parser.add_argument("--split", default="train", help="Which split to load")
+    parser.add_argument("--offset", type=int, required=True, help="Start index into the split")
+    parser.add_argument("--size", type=int, required=True, help="Number of records to process")
+    parser.add_argument("--output-file", default=None, help="Path for output JSONL")
     parser.add_argument(
-        "--split",
-        default="train",
-        help="Which split to load"
-    )
-    parser.add_argument(
-        "--offset",
-        type=int,
-        required=True,
-        help="Start index into the split"
-    )
-    parser.add_argument(
-        "--size",
-        type=int,
-        required=True,
-        help="Number of records to process"
-    )
-    parser.add_argument(
-        "--output-file",
-        default=None,
-        help="Path for output JSONL"
-    )
-    parser.add_argument(
-        "--push_to_hub",
-        default=None,
-        type=str,
-        help="Give a dataset name to push this data to the hub."
-    )
-    parser.add_argument(
-        "--chat_template",
-        type=str,
-        default=None,
-        help="Chat template name"
-    )
-    parser.add_argument(
-        "--number_samples",
-        type=int,
-        default=8,
-        help="Number of samples to generate per record"
-    )
-    parser.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Sampling temperature"
+        "--push_to_hub", default=None, type=str, help="Give a dataset name to push this data to the hub."
     )
+    parser.add_argument("--chat_template", type=str, default=None, help="Chat template name")
+    parser.add_argument("--number_samples", type=int, default=8, help="Number of samples to generate per record")
+    parser.add_argument("--temperature", type=float, default=1.0, help="Sampling temperature")
+    parser.add_argument("--top_p", type=float, default=1.0, help="Sampling temperature")
     args = parser.parse_args()
 
     # 1. Load and slice dataset
@@ -106,20 +62,14 @@ def main():
         tokenizer.apply_chat_template(
             sample["messages"][:-1] if len(sample["messages"]) > 1 else sample["messages"],
             add_generation_prompt=True,
-            tokenize=False
+            tokenize=False,
         )
         for sample in subset
     ]
     # 4. vLLM bulk generate
-    llm = LLM(
-        model=args.model,
-        dtype="bfloat16",
-        enable_prefix_caching=True
-    )
+    llm = LLM(model=args.model, dtype="bfloat16", enable_prefix_caching=True)
     sampling_params = SamplingParams(
-        temperature=args.temperature,
-        n=args.number_samples,
-        max_tokens=32768,
+        temperature=args.temperature, top_p=args.top_p, n=args.number_samples, max_tokens=32768
     )
     outputs = llm.generate(prompts, sampling_params)
 

diff --git a/scripts/eval/oe-eval.sh b/scripts/eval/oe-eval.sh
@@ -127,7 +127,8 @@ fi
 # Set wandb run path to upload to wandb if available
 WANDB_ARG=""
 if [[ -n "$WANDB_RUN_PATH" ]]; then
-    beaker_user=$(beaker account whoami --format json | jq -r '.[0].name')
+    beaker_user=$(beaker account whoami --format text | awk 'NR==2 {print $2}')
+    echo "Assuming beaker user $beaker_user"
     if ! beaker secret list --workspace ai2/tulu-3-results | grep -q "${beaker_user}_WANDB_API_KEY"; then
         echo "WARNING: No ${beaker_user}_WANDB_API_KEY secret found in workspace ai2/tulu-3-results."
         echo "add your WANDB_API_KEY as a secret to this workspace in order to use --oe_eval_log_to_wandb"

diff --git a/scripts/train/build_image_and_launch.sh b/scripts/train/build_image_and_launch.sh
@@ -21,7 +21,7 @@ git_branch=$(git rev-parse --abbrev-ref HEAD)
 # Sanitize the branch name to remove invalid characters for Beaker names
 # Beaker names can only contain letters, numbers, -_. and may not start with -
 sanitized_branch=$(echo "$git_branch" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/^-//')
-image_name=open-instruct-integration-test-${sanitized_branch}
+image_name=open-instruct-integration-test-${sanitized_branch}-${git_hash}
 
 # Build the Docker image exactly like push-image.yml does, passing git info as build args
 docker build --platform=linux/amd64 \

diff --git a/scripts/train/debug/grpo_fast.sh b/scripts/train/debug/grpo_fast.sh
@@ -1,3 +1,19 @@
+#!/bin/bash
+
+python mason.py \
+    --task_name grpo_debug_small \
+    --cluster ai2/augusta \
+    --workspace ai2/oe-adapt-code \
+    --priority high \
+    --pure_docker_mode \
+    --image michaeln/open_instruct_2.5-rl0 \
+    --preemptible \
+    --num_nodes 1 \
+    --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+    --env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
+    --gpus 1 \
+    --budget ai2/oe-adapt \
+    -- \
 uv run python open_instruct/grpo_fast.py \
     --dataset_mixer_list ai2-adapt-dev/rlvr_gsm8k_zs 64 \
     --dataset_mixer_list_splits train \
@@ -18,19 +34,25 @@ uv run python open_instruct/grpo_fast.py \
     --ground_truths_key ground_truth \
     --chat_template_name r1_simple_chat_postpend_think \
     --learning_rate 3e-7 \
-    --total_episodes 200 \
+    --total_episodes 1600 \
     --deepspeed_stage 2 \
     --num_epochs 1 \
     --num_learners_per_node 1 \
     --vllm_tensor_parallel_size 1 \
-    --beta 0.01 \
+    --beta 0. \
     --seed 3 \
-    --local_eval_every 1 \
+    --local_eval_every 25 \
     --vllm_sync_backend gloo \
     --vllm_gpu_memory_utilization 0.3 \
-    --save_traces \
     --vllm_enforce_eager \
     --gradient_checkpointing \
     --single_gpu_mode \
     --push_to_hub false \
-    # --with_tracking
+    --with_tracking \
+    --save_freq 25 \
+    --eval_on_step_0 \
+    --oe_eval_max_length 512 \
+    --try_launch_beaker_eval_jobs_on_weka True \
+    --oe_eval_tasks gsm8k \
+    --oe_eval_beaker_image michaeln/oe_eval_olmo2_retrofit \
+    --eval_priority high