allenai · mnoukhov · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -159,3 +159,4 @@ dmypy.json
 cache/
 local_dataset_cache/
 scratch/
+vllm_olmo2.5/
diff --git a/Dockerfile b/Dockerfile
@@ -65,6 +65,8 @@ ENV UV_CACHE_DIR=/root/.cache/uv
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
 ENV UV_COMPILE_BYTECODE=0
 
+RUN git clone -b shanea/olmo2-retrofit https://github.com/2015aroras/vllm.git vllm_olmo2.5
+
 # Install dependencies
 RUN --mount=type=cache,target=${UV_CACHE_DIR} \
     --mount=type=bind,source=uv.lock,target=uv.lock \
@@ -78,7 +80,7 @@ COPY configs configs
 COPY scripts scripts
 COPY mason.py mason.py
 # Copy oe-eval-internal if it exists (wildcard pattern won't fail if missing)
-COPY oe-eval-interna[l] oe-eval-internal/
+COPY oe-eval-internal oe-eval-internal
 COPY open_instruct open_instruct
 
 # Add build arguments for git information

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: style quality
+.PHONY: style quality docker
 
 # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 export PYTHONPATH = open_instruct
@@ -16,3 +16,13 @@ style-check:   ## *fail* if anything needs rewriting
 
 quality-check: ## *fail* if any rewrite was needed
 	uv run ruff check --exit-non-zero-on-fix $(check_dirs)
+
+setup:
+	git clone -b shanea/olmo2-retrofit https://github.com/2015aroras/vllm.git vllm_olmo2.5
+
+docker:
+	DOCKER_BUILDKIT=1 docker build -f Dockerfile --build-arg UV_CACHE_DIR=$(UV_CACHE_DIR) -t open_instruct_olmo2_retrofit .
+	# if you are internally at AI2, you can create an image like this:
+	$(eval beaker_user := $(shell beaker account whoami --format json | jq -r '.[0].name'))
+	beaker image delete $(beaker_user)/open_instruct_olmo2_retrofit
+	beaker image create open_instruct_olmo2_retrofit -n open_instruct_olmo2_retrofit -w ai2/$(beaker_user)
diff --git a/open_instruct/dataset_transformation.py b/open_instruct/dataset_transformation.py
@@ -442,6 +442,24 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
         "{% endif %}"
         "{% endfor %}"
     ),
+    "olmo_thinker_r1_style_nochat": (
+        "A conversation between user and assistant. "
+        "The user asks a question, and the assistant solves it. "
+        "The assistant first thinks and reasons about the question "
+        "and after thinking provides the user with the answer. "
+        "The reasoning process is enclosed in <think> </think> tags "
+        "and the answer is enclosed in <answer> </answer> tags "
+        "so the full response is <think> reasoning process here </think> "
+        "<answer> answer here </answer>."
+        "\n\n"
+        "{% for message in messages %}"
+        "{{ '\n\n' if not loop.first else '' }}"
+        "{{ message['role'].capitalize() + ': ' + message['content'] + '\n' }}"
+        "{% if loop.last and add_generation_prompt %}"
+        "{{ 'Assistant: <think>' }}"
+        "{% endif %}"
+        "{% endfor %}"
+    ),
     # template is taken from https://arxiv.org/abs/2501.12948.
     "r1_simple_chat": (
         "A conversation between User and Assistant. "

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,17 +19,17 @@ dependencies = [
     "nvitop>=1.4.2",
     "packaging>=24.2",
     "peft>=0.13.2",
-    "ray[default]>=2.44.1",
+    "ray[default]==2.46.0",
     "setuptools>=75.6.0,<80.0.0",
     "tensorboard>=2.18.0",
     "torch>=2.7.0,<2.8",
-    "transformers>=4.52.4,<4.54.0", # see https://github.com/vllm-project/vllm-ascend/issues/2046
-    "vllm==0.9.1",
+    "transformers @ git+https://github.com/2015aroras/transformers.git@shanea/olmo2-retrofit",
     "wandb==0.18.1",
     "langdetect==1.0.9",
     "immutabledict==1.2.0",
     "flash-attn>=2.8.0.post2; platform_system != 'Darwin'",
     "liger-kernel>=0.5.4; platform_system != 'Darwin'",
+    "vllm" # installed locally with git clone because otherwise errors
 ]
 
 [build-system]
@@ -44,12 +44,14 @@ flash-attn = [{ requirement = "torch", match-runtime = true }]
 
 [tool.uv.extra-build-variables]
 flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
+vllm = { VLLM_USE_PRECOMPILED = "1" }
 
 # pytorch related setups
 [tool.uv.sources]
 torch = [
   { index = "pytorch-cu128", marker = "platform_system != 'Darwin'"},
 ]
+vllm = { path = "vllm_olmo2.5", editable = true }
 
 [[tool.uv.index]]
 name = "pytorch-cu128"
@@ -95,6 +97,7 @@ target-version = ['py310']
 
 [tool.isort]
 known_first_party = ["open_instruct"]
+known-third-party = ["wandb"]
 profile = "black"
 src_paths = ["open_instruct"]
 

diff --git a/scripts/data/rlvr/filtering_vllm.py b/scripts/data/rlvr/filtering_vllm.py
@@ -1,4 +1,4 @@
-'''
+"""
 python mason.py \
   --cluster ai2/jupiter-cirrascale-2 --image nathanl/open_instruct_auto \
   --workspace ai2/tulu-thinker \
@@ -15,7 +15,8 @@
   --size 100000 \
   --output-file filtered_datasets/qwen2_5_openthoughts2/orz.jsonl \
   --number_samples 8
-'''
+"""
+
 import argparse
 import json
 
@@ -27,65 +28,20 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Bulk-generate N samples per HF dataset record using vLLM."
-    )
-    parser.add_argument(
-        "--model",
-        required=True,
-        help="vLLM model ID (e.g. facebook/opt-125m)"
-    )
-    parser.add_argument(
-        "--dataset",
-        required=True,
-        help="HF dataset name (e.g. squad)"
-    )
+    parser = argparse.ArgumentParser(description="Bulk-generate N samples per HF dataset record using vLLM.")
+    parser.add_argument("--model", required=True, help="vLLM model ID (e.g. facebook/opt-125m)")
+    parser.add_argument("--dataset", required=True, help="HF dataset name (e.g. squad)")
+    parser.add_argument("--split", default="train", help="Which split to load")
+    parser.add_argument("--offset", type=int, required=True, help="Start index into the split")
+    parser.add_argument("--size", type=int, required=True, help="Number of records to process")
+    parser.add_argument("--output-file", default=None, help="Path for output JSONL")
     parser.add_argument(
-        "--split",
-        default="train",
-        help="Which split to load"
-    )
-    parser.add_argument(
-        "--offset",
-        type=int,
-        required=True,
-        help="Start index into the split"
-    )
-    parser.add_argument(
-        "--size",
-        type=int,
-        required=True,
-        help="Number of records to process"
-    )
-    parser.add_argument(
-        "--output-file",
-        default=None,
-        help="Path for output JSONL"
-    )
-    parser.add_argument(
-        "--push_to_hub",
-        default=None,
-        type=str,
-        help="Give a dataset name to push this data to the hub."
-    )
-    parser.add_argument(
-        "--chat_template",
-        type=str,
-        default=None,
-        help="Chat template name"
-    )
-    parser.add_argument(
-        "--number_samples",
-        type=int,
-        default=8,
-        help="Number of samples to generate per record"
-    )
-    parser.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Sampling temperature"
+        "--push_to_hub", default=None, type=str, help="Give a dataset name to push this data to the hub."
     )
+    parser.add_argument("--chat_template", type=str, default=None, help="Chat template name")
+    parser.add_argument("--number_samples", type=int, default=8, help="Number of samples to generate per record")
+    parser.add_argument("--temperature", type=float, default=1.0, help="Sampling temperature")
+    parser.add_argument("--top_p", type=float, default=1.0, help="Sampling temperature")
     args = parser.parse_args()
 
     # 1. Load and slice dataset
@@ -106,20 +62,14 @@ def main():
         tokenizer.apply_chat_template(
             sample["messages"][:-1] if len(sample["messages"]) > 1 else sample["messages"],
             add_generation_prompt=True,
-            tokenize=False
+            tokenize=False,
         )
         for sample in subset
     ]
     # 4. vLLM bulk generate
-    llm = LLM(
-        model=args.model,
-        dtype="bfloat16",
-        enable_prefix_caching=True
-    )
+    llm = LLM(model=args.model, dtype="bfloat16", enable_prefix_caching=True)
     sampling_params = SamplingParams(
-        temperature=args.temperature,
-        n=args.number_samples,
-        max_tokens=32768,
+        temperature=args.temperature, top_p=args.top_p, n=args.number_samples, max_tokens=32768
     )
     outputs = llm.generate(prompts, sampling_params)
 

diff --git a/scripts/eval/oe-eval.sh b/scripts/eval/oe-eval.sh
@@ -127,10 +127,11 @@ fi
 # Set wandb run path to upload to wandb if available
 WANDB_ARG=""
 if [[ -n "$WANDB_RUN_PATH" ]]; then
-    beaker_user=$(beaker account whoami --format json | jq -r '.[0].name')
+    beaker_user=$(beaker account whoami --format text | awk 'NR==2 {print $2}')
+    echo "Using WANDB_API_KEY from ${beaker_user}"
     if ! beaker secret list --workspace ai2/tulu-3-results | grep -q "${beaker_user}_WANDB_API_KEY"; then
         echo "WARNING: No ${beaker_user}_WANDB_API_KEY secret found in workspace ai2/tulu-3-results."
-        echo "add your WANDB_API_KEY as a secret to this workspace in order to use --oe_eval_log_to_wandb"
+        echo "add your WANDB_API_KEY as a secret to this workspace in order to log oe-eval results to wandb"
     else
         WANDB_ARG=" --wandb-run-path $WANDB_RUN_PATH --gantry-secret-wandb-api-key ${beaker_user}_WANDB_API_KEY"
     fi

diff --git a/scripts/train/build_image_and_launch.sh b/scripts/train/build_image_and_launch.sh
@@ -21,7 +21,7 @@ git_branch=$(git rev-parse --abbrev-ref HEAD)
 # Sanitize the branch name to remove invalid characters for Beaker names
 # Beaker names can only contain letters, numbers, -_. and may not start with -
 sanitized_branch=$(echo "$git_branch" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/^-//')
-image_name=open-instruct-integration-test-${sanitized_branch}
+image_name=open-instruct-integration-test-${sanitized_branch}-${git_hash}
 
 # Build the Docker image exactly like push-image.yml does, passing git info as build args
 docker build --platform=linux/amd64 \

diff --git a/scripts/train/rlvr/grpo_olmo25.sh b/scripts/train/rlvr/grpo_olmo25.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# OLMo 2.5 model
+MODEL_NAME_OR_PATH="/weka/oe-eval-default/ai2-llm/checkpoints/lucas/olmo25_7b_lc_64k_6T_M100B_round5-sparkle_6634-pre_s2pdf_gzip2080_cweN-yake-all-olmo_yarn-fullonly_50B-740666e3/step11921-hf"
+GS_MODEL_NAME="olmo25_7b_lc_beta_740666e3"
+
+# english only DAPO
+DATASETS="mnoukhov/DAPO-Math-14k-Processed-RLVR 1.0"
+# DATASETS="TTTXXX01/MATH_3000_Filtered 1.0"
+
+# math evals
+EVALS="minerva_math::hamish_zs_reasoning_deepseek,minerva_math_500::hamish_zs_reasoning_deepseek,aime:zs_cot_r1::pass_at_32_2024_deepseek,aime:zs_cot_r1::pass_at_32_2025_deepseek"
+
+# AIME 2024, 2025 local evals
+LOCAL_EVALS="mnoukhov/aime2024-25-rlvr 1.0 mnoukhov/aime2024-25-rlvr 1.0"
+LOCAL_EVAL_SPLITS="test_2024 test_2024 test_2025 test_2025"
+# tengmath3k
+EXP_NAME="grpo_tengmath3k_k16_${GS_MODEL_NAME}"
+# EXP_NAME="grpo_dapo14k_${GS_MODEL_NAME}"
+
+cluster=ai2/augusta
+
+python mason.py \
+    --task_name ${EXP_NAME} \
+    --cluster ${cluster} \
+    --workspace ai2/tulu-thinker \
+    --priority high \
+    --pure_docker_mode \
+    --image ${1:-michaeln/open_instruct_olmo2_retrofit} \
+    --preemptible \
+    --num_nodes 4 \
+    --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+    --env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
+    --gs_model_name $GS_MODEL_NAME \
+    --gpus 8 \
+    --budget ai2/oe-adapt \
+    -- \
+source configs/beaker_configs/ray_node_setup.sh \&\& \
+source configs/beaker_configs/code_api_setup.sh \&\& \
+python open_instruct/grpo_fast.py \
+    --exp_name ${EXP_NAME} \
+    --beta 0.0 \
+    --num_samples_per_prompt_rollout 16 \
+    --num_unique_prompts_rollout 24 \
+    --num_mini_batches 1 \
+    --learning_rate 1e-6 \
+    --per_device_train_batch_size 1 \
+    --kl_estimator kl3 \
+    --dataset_mixer_list $DATASETS \
+    --dataset_mixer_list_splits train \
+    --dataset_mixer_eval_list $LOCAL_EVALS \
+    --dataset_mixer_eval_list_splits $LOCAL_EVAL_SPLITS \
+    --max_token_length 2048 \
+    --max_prompt_token_length 2048 \
+    --response_length 8192 \
+    --pack_length 32768 \
+    --model_name_or_path ${MODEL_NAME_OR_PATH} \
+    --chat_template_name olmo_thinker_r1_style_nochat \
+    --stop_strings "</answer>" \
+    --non_stop_penalty False \
+    --temperature 1.0 \
+    --total_episodes 38400 \
+    --deepspeed_stage 3 \
+    --num_learners_per_node 8 \
+    --vllm_num_engines 24 \
+    --vllm_tensor_parallel_size 1 \
+    --lr_scheduler_type constant \
+    --apply_verifiable_reward true \
+    --seed 1 \
+    --local_eval_every 50 \
+    --save_freq 50 \
+    --checkpoint_state_freq 50 \
+    --gradient_checkpointing \
+    --with_tracking \
+    --vllm_enable_prefix_caching \
+    --clip_higher 0.272 \
+    --mask_truncated_completions True \
+    --oe_eval_max_length 32000 \
+    --eval_priority high \
+    --try_launch_beaker_eval_jobs_on_weka True \
+    --oe_eval_tasks $EVALS \
+    --oe_eval_beaker_image oe-eval-beaker/oe_eval_olmo2_retrofit_auto