Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/solver_judge/solver_judge_flow_colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
},
"outputs": [],
"source": [
"!pip install \"transformers[hf_xet]>=4.51.0\" accelerate datasets peft hf-transfer \\\n",
"!pip install \"transformers[hf_xet]>=4.57.0\" accelerate datasets peft hf-transfer \\\n",
" \"numpy<2.0.0\" \"pyarrow>=15.0.0\" pandas \\\n",
" ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \\\n",
" pytest py-spy pyext pre-commit ruff tensorboard\n",
Expand Down
7 changes: 4 additions & 3 deletions rllm/trainer/verl/agent_workflow_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ def __init__(
self,
config,
tokenizer,
role_worker_mapping: dict[Role, WorkerType],
resource_pool_manager: ResourcePoolManager,
processor=None,
role_worker_mapping: dict[Role, WorkerType] = None,
resource_pool_manager: ResourcePoolManager = None,
ray_worker_group_cls: RayWorkerGroup = RayWorkerGroup,
reward_fn=None,
val_reward_fn=None,
workflow_class=None,
workflow_args=None,
):
super().__init__(config=config, tokenizer=tokenizer, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, reward_fn=reward_fn, val_reward_fn=val_reward_fn)
super().__init__(config=config, tokenizer=tokenizer, processor=processor, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, reward_fn=reward_fn, val_reward_fn=val_reward_fn)

self.workflow_class = workflow_class
self.workflow_args = workflow_args or {}
Expand Down
6 changes: 4 additions & 2 deletions rllm/trainer/verl/agent_workflow_trainer_fireworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def __init__(
self,
config,
tokenizer,
role_worker_mapping: dict[Role, WorkerType],
resource_pool_manager: ResourcePoolManager,
processor=None,
role_worker_mapping: dict[Role, WorkerType] = None,
resource_pool_manager: ResourcePoolManager = None,
ray_worker_group_cls: RayWorkerGroup = RayWorkerGroup,
reward_fn=None,
val_reward_fn=None,
Expand All @@ -48,6 +49,7 @@ def __init__(
super().__init__(
config=config,
tokenizer=tokenizer,
processor=processor,
role_worker_mapping=role_worker_mapping,
resource_pool_manager=resource_pool_manager,
ray_worker_group_cls=ray_worker_group_cls,
Expand Down
5 changes: 3 additions & 2 deletions rllm/trainer/verl/train_workflow_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ def run(self, config, workflow_class=None, workflow_args=None):
local_path = copy_to_local(config.actor_rollout_ref.model.path, use_shm=config.actor_rollout_ref.model.get("use_shm", False))

# Instantiate the tokenizer and processor.
from verl.utils import hf_tokenizer
from verl.utils import hf_processor, hf_tokenizer

trust_remote_code = config.data.get("trust_remote_code", False)
tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code)
# Used for multimodal LLM, could be None
# processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True)
processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True)

# Define worker classes based on the actor strategy.
if config.actor_rollout_ref.actor.strategy in {"fsdp", "fsdp2"}:
Expand Down Expand Up @@ -175,6 +175,7 @@ def run(self, config, workflow_class=None, workflow_args=None):
trainer = FireworksAgentWorkflowPPOTrainer(
config=config,
tokenizer=tokenizer,
processor=processor,
role_worker_mapping=role_worker_mapping,
resource_pool_manager=resource_pool_manager,
ray_worker_group_cls=ray_worker_group_cls,
Expand Down
2 changes: 1 addition & 1 deletion scripts/install_verl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21


echo "2. install basic packages"
pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
pip install "transformers[hf_xet]>=4.57.0" accelerate datasets peft hf-transfer \
"numpy<2.0.0" "pyarrow>=19.0.1" pandas \
"ray[default]" codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile xgrammar \
pytest py-spy pyext pre-commit ruff
Expand Down
2 changes: 1 addition & 1 deletion verl
Submodule verl updated 667 files