diff --git a/.github/workflows/yapf_format.yml b/.github/workflows/yapf_format.yml index c6d27c3..548df4c 100644 --- a/.github/workflows/yapf_format.yml +++ b/.github/workflows/yapf_format.yml @@ -42,4 +42,4 @@ jobs: pip install toml==0.10.2 - name: Running yapf run: | - yapf -r -vv -d --style=./.style.yapf verl tests single_controller examples + yapf -r -vv -d --style=./.style.yapf verl tests examples diff --git a/README.md b/README.md index faa3d1f..2f96ba3 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ pip3 install yapf ``` Then, make sure you are at top level of verl repo and run ```bash -yapf -ir -vv --style ./.style.yapf verl single_controller examples +yapf -ir -vv --style ./.style.yapf verl examples ``` diff --git a/docs/advance/dpo_extension.rst b/docs/advance/dpo_extension.rst index fb7754c..592d971 100644 --- a/docs/advance/dpo_extension.rst +++ b/docs/advance/dpo_extension.rst @@ -47,8 +47,8 @@ Implementation details: .. code:: python - from single_controller.base import Worker - from single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool + from verl.single_controller.base import Worker + from verl.single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool import ray @ray.remote @@ -75,7 +75,7 @@ API: compute reference log probability .. code:: python - from single_controller.base import Worker + from verl.single_controller.base import Worker import ray @ray.remote @@ -93,7 +93,7 @@ API: Update actor model parameters .. code:: python - from single_controller.base import Worker + from verl.single_controller.base import Worker import ray @ray.remote @@ -184,7 +184,7 @@ registered into the worker_group** .. code:: python - from single_controller.base.decorator import register + from verl.single_controller.base.decorator import register def dispatch_data(worker_group, data): return data.chunk(worker_group.world_size) @@ -214,11 +214,11 @@ computation, and data collection. Furthermore, the model parallelism size of each model is usually fixed, including dp, tp, pp. So for these common distributed scenarios, we have -pre-implemented specific dispatch and collect methods,in `decorator.py `_, which can be directly used to wrap the computations. +pre-implemented specific dispatch and collect methods,in `decorator.py `_, which can be directly used to wrap the computations. .. code:: python - from single_controller.base.decorator import register, Dispatch + from verl.single_controller.base.decorator import register, Dispatch @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) def generate_sequences(self, data: DataProto) -> DataProto: diff --git a/docs/examples/ppo_code_architecture.rst b/docs/examples/ppo_code_architecture.rst index bd247a2..ab1f66a 100644 --- a/docs/examples/ppo_code_architecture.rst +++ b/docs/examples/ppo_code_architecture.rst @@ -49,13 +49,13 @@ Define worker classes if config.actor_rollout_ref.actor.strategy == 'fsdp': # for FSDP backend assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray import RayWorkerGroup + from verl.single_controller.ray import RayWorkerGroup ray_worker_group_cls = RayWorkerGroup elif config.actor_rollout_ref.actor.strategy == 'megatron': # for Megatron backend assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray.megatron import NVMegatronRayWorkerGroup + from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup ray_worker_group_cls = NVMegatronRayWorkerGroup # Ray worker class for Megatron-LM else: diff --git a/docs/workers/megatron_workers.rst b/docs/workers/megatron_workers.rst index 5a8f5ed..d6f88c3 100644 --- a/docs/workers/megatron_workers.rst +++ b/docs/workers/megatron_workers.rst @@ -40,7 +40,7 @@ We implement various of APIs for each ``Worker`` class decorated by the ``@register(dispatch_mode=)`` . These APIs can be called by the ray driver process. The data can be correctly collect and dispatch following the ``dispatch_mode`` on each function. The supported dispatch_model -(i.e., transfer protocols) can be found in `decorator.py `_. +(i.e., transfer protocols) can be found in `decorator.py `_. ActorRolloutRefWorker ^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/ray/tutorial.ipynb b/examples/ray/tutorial.ipynb index 9b8591a..f270cd9 100644 --- a/examples/ray/tutorial.ipynb +++ b/examples/ray/tutorial.ipynb @@ -232,8 +232,8 @@ }, "outputs": [], "source": [ - "from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool\n", - "from single_controller.base import Worker" + "from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool\n", + "from verl.single_controller.base import Worker" ] }, { @@ -437,7 +437,7 @@ }, "outputs": [], "source": [ - "from single_controller.ray.decorator import register, Dispatch, Execute" + "from verl.single_controller.ray.decorator import register, Dispatch, Execute" ] }, { @@ -518,7 +518,7 @@ }, "outputs": [], "source": [ - "from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute" + "from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute" ] }, { @@ -723,10 +723,10 @@ }, "outputs": [], "source": [ - "from single_controller.ray.decorator import register, Dispatch, Execute\n", - "from single_controller.ray.megatron import NVMegatronRayWorkerGroup\n", - "from single_controller.base.megatron.worker import MegatronWorker\n", - "from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup\n", + "from verl.single_controller.ray.decorator import register, Dispatch, Execute\n", + "from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup\n", + "from verl.single_controller.base.megatron.worker import MegatronWorker\n", + "from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup\n", "from omegaconf import OmegaConf\n", "from megatron.core import parallel_state as mpu" ] diff --git a/examples/split_placement/main_ppo_split.py b/examples/split_placement/main_ppo_split.py index 524f35e..5ae4b21 100644 --- a/examples/split_placement/main_ppo_split.py +++ b/examples/split_placement/main_ppo_split.py @@ -121,13 +121,13 @@ def main_task(config): if config.actor_rollout_ref.actor.strategy == 'fsdp': assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray import RayWorkerGroup + from verl.single_controller.ray import RayWorkerGroup ray_worker_group_cls = RayWorkerGroup elif config.actor_rollout_ref.actor.strategy == 'megatron': assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray.megatron import NVMegatronRayWorkerGroup + from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup ray_worker_group_cls = NVMegatronRayWorkerGroup else: diff --git a/examples/split_placement/split_monkey_patch.py b/examples/split_placement/split_monkey_patch.py index 70ed267..5e09377 100644 --- a/examples/split_placement/split_monkey_patch.py +++ b/examples/split_placement/split_monkey_patch.py @@ -16,7 +16,7 @@ """ import os from pprint import pprint -from single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs +from verl.single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs from verl import DataProto from verl.trainer.ppo.ray_trainer import compute_advantage, apply_kl_penalty, reduce_metrics, compute_data_metrics, Role, create_colocated_worker_cls from codetiming import Timer diff --git a/tests/ray/check_worker_alive/main.py b/tests/ray/check_worker_alive/main.py index 9526f1b..fcebbfe 100644 --- a/tests/ray/check_worker_alive/main.py +++ b/tests/ray/check_worker_alive/main.py @@ -18,9 +18,9 @@ import ray -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup -from single_controller.base.worker import Worker -from single_controller.base.decorator import register, Dispatch +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup +from verl.single_controller.base.worker import Worker +from verl.single_controller.base.decorator import register, Dispatch @ray.remote diff --git a/tests/ray/detached_worker/client.py b/tests/ray/detached_worker/client.py index 0595bcf..1773fff 100644 --- a/tests/ray/detached_worker/client.py +++ b/tests/ray/detached_worker/client.py @@ -19,8 +19,8 @@ import torch from verl import DataProto -from single_controller.ray import RayClassWithInitArgs -from single_controller.ray.megatron import NVMegatronRayWorkerGroup +from verl.single_controller.ray import RayClassWithInitArgs +from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup from tensordict import TensorDict diff --git a/tests/ray/detached_worker/server.py b/tests/ray/detached_worker/server.py index 1842f0a..c8057e3 100644 --- a/tests/ray/detached_worker/server.py +++ b/tests/ray/detached_worker/server.py @@ -25,10 +25,10 @@ from torch import nn import ray -from single_controller.ray import RayClassWithInitArgs, RayResourcePool -from single_controller.ray.megatron import NVMegatronRayWorkerGroup -from single_controller.base.megatron.worker import MegatronWorker -from single_controller.ray.decorator import register, Dispatch +from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool +from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup +from verl.single_controller.base.megatron.worker import MegatronWorker +from verl.single_controller.ray.decorator import register, Dispatch from verl import DataProto from verl.models.llama.megatron import ParallelLlamaForCausalLMRmPadPP diff --git a/tests/ray/test_colocated_workers.py b/tests/ray/test_colocated_workers.py index 0515400..96b859b 100644 --- a/tests/ray/test_colocated_workers.py +++ b/tests/ray/test_colocated_workers.py @@ -14,9 +14,9 @@ import ray -from single_controller.base import Worker -from single_controller.base.decorator import register, Dispatch -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls +from verl.single_controller.base import Worker +from verl.single_controller.base.decorator import register, Dispatch +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls from verl import DataProto diff --git a/tests/ray/test_data_transfer.py b/tests/ray/test_data_transfer.py index 480e576..46b962c 100644 --- a/tests/ray/test_data_transfer.py +++ b/tests/ray/test_data_transfer.py @@ -15,10 +15,10 @@ In this test, we instantiate a data parallel worker with 8 GPUs """ -from single_controller.base import Worker -from single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool +from verl.single_controller.base import Worker +from verl.single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool -from single_controller.base.decorator import Dispatch, register +from verl.single_controller.base.decorator import Dispatch, register import ray import torch diff --git a/tests/ray/test_driverfunc_to_worker.py b/tests/ray/test_driverfunc_to_worker.py index 2c7007b..ea253fd 100644 --- a/tests/ray/test_driverfunc_to_worker.py +++ b/tests/ray/test_driverfunc_to_worker.py @@ -18,9 +18,9 @@ from verl import DataProto from tensordict import TensorDict -from single_controller.base.worker import Worker -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs -from single_controller.ray import RayWorkerGroup +from verl.single_controller.base.worker import Worker +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs +from verl.single_controller.ray import RayWorkerGroup os.environ['RAY_DEDUP_LOGS'] = '0' os.environ['NCCL_DEBUG'] = 'WARN' diff --git a/tests/ray/test_high_level_scheduling_api.py b/tests/ray/test_high_level_scheduling_api.py index 33d0d14..2d83206 100644 --- a/tests/ray/test_high_level_scheduling_api.py +++ b/tests/ray/test_high_level_scheduling_api.py @@ -16,8 +16,8 @@ import ray -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool -from single_controller.base.worker import Worker +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool +from verl.single_controller.base.worker import Worker @ray.remote diff --git a/tests/ray/test_ray_local_envs.py b/tests/ray/test_ray_local_envs.py index 53bf850..542d536 100644 --- a/tests/ray/test_ray_local_envs.py +++ b/tests/ray/test_ray_local_envs.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -e2e test single_controller.ray +e2e test verl.single_controller.ray """ import os import ray -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup -from single_controller.base.worker import Worker -from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup +from verl.single_controller.base.worker import Worker +from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute @ray.remote diff --git a/tests/ray/test_remote_api.py b/tests/ray/test_remote_api.py index aa4c1c1..b7a64b6 100644 --- a/tests/ray/test_remote_api.py +++ b/tests/ray/test_remote_api.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from single_controller.remote import remote, RemoteBackend, SharedResourcePool -from single_controller.base.decorator import register, Dispatch -from single_controller.base.worker import Worker +from verl.single_controller.remote import remote, RemoteBackend, SharedResourcePool +from verl.single_controller.base.decorator import register, Dispatch +from verl.single_controller.base.worker import Worker @remote(process_on_nodes=[3], use_gpu=True, name_prefix="actor", sharing=SharedResourcePool) diff --git a/tests/ray/test_worker_group_basics.py b/tests/ray/test_worker_group_basics.py index ee1ef10..fa18e9b 100644 --- a/tests/ray/test_worker_group_basics.py +++ b/tests/ray/test_worker_group_basics.py @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -e2e test single_controller.ray +e2e test verl.single_controller.ray """ import torch import ray -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup -from single_controller.base.worker import Worker -from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup +from verl.single_controller.base.worker import Worker +from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute def two_to_all_dispatch_fn(worker_group, *args, **kwargs): diff --git a/tests/ray/test_worker_group_torch.py b/tests/ray/test_worker_group_torch.py index c48ed1e..13508ed 100644 --- a/tests/ray/test_worker_group_torch.py +++ b/tests/ray/test_worker_group_torch.py @@ -21,8 +21,8 @@ import torch.distributed import ray -from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup -from single_controller.base.worker import Worker +from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup +from verl.single_controller.base.worker import Worker @ray.remote diff --git a/single_controller/__init__.py b/verl/single_controller/__init__.py similarity index 100% rename from single_controller/__init__.py rename to verl/single_controller/__init__.py diff --git a/single_controller/base/__init__.py b/verl/single_controller/base/__init__.py similarity index 100% rename from single_controller/base/__init__.py rename to verl/single_controller/base/__init__.py diff --git a/single_controller/base/decorator.py b/verl/single_controller/base/decorator.py similarity index 93% rename from single_controller/base/decorator.py rename to verl/single_controller/base/decorator.py index 9544ac6..6fdacb6 100644 --- a/single_controller/base/decorator.py +++ b/verl/single_controller/base/decorator.py @@ -75,7 +75,7 @@ def dispatch_megatron_compute(worker_group, *args, **kwargs): """ User passes in dp data. The data is dispatched to all tp/pp ranks with the same dp """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup), f'worker_group must be MegatronWorkerGroup, Got {type(worker_group)}' @@ -104,7 +104,7 @@ def collect_megatron_compute(worker_group, output): """ Only collect the data from the tp=0 and pp=last and every dp ranks """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) output_in_dp = [] pp_size = worker_group.get_megatron_global_info().pp_size @@ -119,7 +119,7 @@ def dispatch_megatron_compute_data_proto(worker_group, *args, **kwargs): """ All the args and kwargs must be DataProto. The batch will be chunked by dp_size and passed to each rank """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.dp_size, *args, **kwargs) @@ -162,7 +162,7 @@ def dispatch_megatron_pp_as_dp(worker_group, *args, **kwargs): """ treat pp as dp. """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) pp_size = worker_group.pp_size @@ -210,7 +210,7 @@ def collect_megatron_pp_as_dp(worker_group, output): """ treat pp as dp. Only collect data on tp=0 """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) output_in_dp = [] for global_rank in range(worker_group.world_size): @@ -224,7 +224,7 @@ def collect_megatron_pp_only(worker_group, output): """ Only collect output of megatron pp. This is useful when examine weight names as they are identical in tp/dp """ - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) output_in_pp = [] for global_rank in range(worker_group.world_size): @@ -235,7 +235,7 @@ def collect_megatron_pp_only(worker_group, output): def dispatch_megatron_pp_as_dp_data_proto(worker_group, *args, **kwargs): - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) pp_dp_size = worker_group.dp_size * worker_group.pp_size @@ -245,7 +245,7 @@ def dispatch_megatron_pp_as_dp_data_proto(worker_group, *args, **kwargs): def collect_megatron_pp_as_dp_data_proto(worker_group, output): from verl.protocol import DataProto - from single_controller.base.megatron.worker_group import MegatronWorkerGroup + from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup assert isinstance(worker_group, MegatronWorkerGroup) output = collect_megatron_pp_as_dp(worker_group, output) @@ -253,7 +253,7 @@ def collect_megatron_pp_as_dp_data_proto(worker_group, output): def dispatch_dp_compute(worker_group, *args, **kwargs): - from single_controller.base.worker_group import WorkerGroup + from verl.single_controller.base.worker_group import WorkerGroup assert isinstance(worker_group, WorkerGroup) for arg in args: assert isinstance(arg, (Tuple, List)) and len(arg) == worker_group.world_size @@ -263,21 +263,21 @@ def dispatch_dp_compute(worker_group, *args, **kwargs): def collect_dp_compute(worker_group, output): - from single_controller.base.worker_group import WorkerGroup + from verl.single_controller.base.worker_group import WorkerGroup assert isinstance(worker_group, WorkerGroup) assert len(output) == worker_group.world_size return output def dispatch_dp_compute_data_proto(worker_group, *args, **kwargs): - from single_controller.base.worker_group import WorkerGroup + from verl.single_controller.base.worker_group import WorkerGroup assert isinstance(worker_group, WorkerGroup) splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.world_size, *args, **kwargs) return splitted_args, splitted_kwargs def dispatch_dp_compute_data_proto_with_func(worker_group, *args, **kwargs): - from single_controller.base.worker_group import WorkerGroup + from verl.single_controller.base.worker_group import WorkerGroup assert isinstance(worker_group, WorkerGroup) assert type(args[0]) == FunctionType # NOTE: The first one args is a function! diff --git a/single_controller/base/dp.py b/verl/single_controller/base/dp.py similarity index 96% rename from single_controller/base/dp.py rename to verl/single_controller/base/dp.py index 5d534e8..2d19188 100644 --- a/single_controller/base/dp.py +++ b/verl/single_controller/base/dp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from single_controller.base.worker import Worker +from verl.single_controller.base.worker import Worker class DPEngineWorker(Worker): diff --git a/single_controller/base/megatron/__init__.py b/verl/single_controller/base/megatron/__init__.py similarity index 100% rename from single_controller/base/megatron/__init__.py rename to verl/single_controller/base/megatron/__init__.py diff --git a/single_controller/base/megatron/worker.py b/verl/single_controller/base/megatron/worker.py similarity index 94% rename from single_controller/base/megatron/worker.py rename to verl/single_controller/base/megatron/worker.py index 46608bb..2d84d29 100644 --- a/single_controller/base/megatron/worker.py +++ b/verl/single_controller/base/megatron/worker.py @@ -14,7 +14,7 @@ import os from dataclasses import dataclass -from single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo +from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo class MegatronWorker(Worker): diff --git a/single_controller/base/megatron/worker_group.py b/verl/single_controller/base/megatron/worker_group.py similarity index 96% rename from single_controller/base/megatron/worker_group.py rename to verl/single_controller/base/megatron/worker_group.py index 59a78ff..67c21d3 100644 --- a/single_controller/base/megatron/worker_group.py +++ b/verl/single_controller/base/megatron/worker_group.py @@ -15,7 +15,7 @@ from typing import Dict from .worker import DistRankInfo, DistGlobalInfo -from single_controller.base import ResourcePool, WorkerGroup +from verl.single_controller.base import ResourcePool, WorkerGroup class MegatronWorkerGroup(WorkerGroup): diff --git a/single_controller/base/register_center/__init__.py b/verl/single_controller/base/register_center/__init__.py similarity index 100% rename from single_controller/base/register_center/__init__.py rename to verl/single_controller/base/register_center/__init__.py diff --git a/single_controller/base/register_center/ray.py b/verl/single_controller/base/register_center/ray.py similarity index 100% rename from single_controller/base/register_center/ray.py rename to verl/single_controller/base/register_center/ray.py diff --git a/single_controller/base/worker.py b/verl/single_controller/base/worker.py similarity index 95% rename from single_controller/base/worker.py rename to verl/single_controller/base/worker.py index efd23a8..2ca961d 100644 --- a/single_controller/base/worker.py +++ b/verl/single_controller/base/worker.py @@ -17,7 +17,7 @@ import os import socket from dataclasses import dataclass -from single_controller.base.decorator import register, Dispatch +from verl.single_controller.base.decorator import register, Dispatch @dataclass @@ -43,7 +43,7 @@ def get_node_ip_by_sdk(): import ray return ray._private.services.get_node_ip_address() elif os.getenv("WG_BACKEND", None) == "torch_rpc": - from single_controller.torchrpc.k8s_client import get_ip_addr + from verl.single_controller.torchrpc.k8s_client import get_ip_addr return get_ip_addr() return None @@ -110,7 +110,7 @@ def _configure_before_init(self, register_center_name: str, rank: int): } if os.getenv("WG_BACKEND", None) == "ray": - from single_controller.base.register_center.ray import create_worker_group_register_center + from verl.single_controller.base.register_center.ray import create_worker_group_register_center self.register_center = create_worker_group_register_center(name=register_center_name, info=rank_zero_info) diff --git a/single_controller/base/worker_group.py b/verl/single_controller/base/worker_group.py similarity index 98% rename from single_controller/base/worker_group.py rename to verl/single_controller/base/worker_group.py index a6bc927..bd58458 100644 --- a/single_controller/base/worker_group.py +++ b/verl/single_controller/base/worker_group.py @@ -20,7 +20,7 @@ import time from typing import List, Any, Callable, Dict -from single_controller.base.decorator import MAGIC_ATTR, Dispatch, get_predefined_dispatch_fn, get_predefined_execute_fn +from verl.single_controller.base.decorator import MAGIC_ATTR, Dispatch, get_predefined_dispatch_fn, get_predefined_execute_fn class ResourcePool: diff --git a/single_controller/ray/__init__.py b/verl/single_controller/ray/__init__.py similarity index 100% rename from single_controller/ray/__init__.py rename to verl/single_controller/ray/__init__.py diff --git a/single_controller/ray/base.py b/verl/single_controller/ray/base.py similarity index 99% rename from single_controller/ray/base.py rename to verl/single_controller/ray/base.py index 2cb8148..eaa1b00 100644 --- a/single_controller/ray/base.py +++ b/verl/single_controller/ray/base.py @@ -21,7 +21,7 @@ from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy, NodeAffinitySchedulingStrategy from ray.experimental.state.api import get_actor -from single_controller.base import WorkerGroup, ResourcePool, ClassWithInitArgs, Worker +from verl.single_controller.base import WorkerGroup, ResourcePool, ClassWithInitArgs, Worker __all__ = ['Worker'] @@ -373,7 +373,7 @@ def world_size(self): """ from unittest.mock import patch -from single_controller.base.decorator import MAGIC_ATTR +from verl.single_controller.base.decorator import MAGIC_ATTR import os diff --git a/single_controller/ray/decorator.py b/verl/single_controller/ray/decorator.py similarity index 97% rename from single_controller/ray/decorator.py rename to verl/single_controller/ray/decorator.py index 006a80c..1de452f 100644 --- a/single_controller/ray/decorator.py +++ b/verl/single_controller/ray/decorator.py @@ -19,7 +19,7 @@ import ray # compatiblity cern -from single_controller.base.decorator import * +from verl.single_controller.base.decorator import * def maybe_remote(main): diff --git a/single_controller/ray/dist_data_pass_protocol.py b/verl/single_controller/ray/dist_data_pass_protocol.py similarity index 100% rename from single_controller/ray/dist_data_pass_protocol.py rename to verl/single_controller/ray/dist_data_pass_protocol.py diff --git a/single_controller/ray/dp.py b/verl/single_controller/ray/dp.py similarity index 97% rename from single_controller/ray/dp.py rename to verl/single_controller/ray/dp.py index fab4da9..b53d4b9 100644 --- a/single_controller/ray/dp.py +++ b/verl/single_controller/ray/dp.py @@ -14,7 +14,7 @@ import ray -from single_controller.ray.base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs +from verl.single_controller.ray.base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs @ray.remote diff --git a/single_controller/ray/megatron.py b/verl/single_controller/ray/megatron.py similarity index 94% rename from single_controller/ray/megatron.py rename to verl/single_controller/ray/megatron.py index 3aad741..2cdb49f 100644 --- a/single_controller/ray/megatron.py +++ b/verl/single_controller/ray/megatron.py @@ -17,8 +17,8 @@ import ray from .base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs -from single_controller.base.megatron.worker import DistRankInfo, DistGlobalInfo -from single_controller.base.megatron.worker_group import MegatronWorkerGroup +from verl.single_controller.base.megatron.worker import DistRankInfo, DistGlobalInfo +from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup # NOTE(sgm): for opensource megatron-core diff --git a/single_controller/version/version b/verl/single_controller/version/version similarity index 100% rename from single_controller/version/version rename to verl/single_controller/version/version diff --git a/verl/trainer/main_generation.py b/verl/trainer/main_generation.py index 0d17073..42469b6 100644 --- a/verl/trainer/main_generation.py +++ b/verl/trainer/main_generation.py @@ -33,7 +33,7 @@ from verl.utils.fs import copy_local_path_from_hdfs from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker from verl.utils.hdfs_io import makedirs -from single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup +from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup @hydra.main(config_path='config', config_name='generation', version_base=None) diff --git a/verl/trainer/main_ppo.py b/verl/trainer/main_ppo.py index cbb1c61..3c165b6 100644 --- a/verl/trainer/main_ppo.py +++ b/verl/trainer/main_ppo.py @@ -120,13 +120,13 @@ def main_task(config): if config.actor_rollout_ref.actor.strategy == 'fsdp': assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray import RayWorkerGroup + from verl.single_controller.ray import RayWorkerGroup ray_worker_group_cls = RayWorkerGroup elif config.actor_rollout_ref.actor.strategy == 'megatron': assert config.actor_rollout_ref.actor.strategy == config.critic.strategy from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker - from single_controller.ray.megatron import NVMegatronRayWorkerGroup + from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup ray_worker_group_cls = NVMegatronRayWorkerGroup else: diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py index 95814f5..3a2b258 100644 --- a/verl/trainer/ppo/ray_trainer.py +++ b/verl/trainer/ppo/ray_trainer.py @@ -26,9 +26,9 @@ import numpy as np from codetiming import Timer -from single_controller.base import Worker -from single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs -from single_controller.ray.base import create_colocated_worker_cls +from verl.single_controller.base import Worker +from verl.single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs +from verl.single_controller.ray.base import create_colocated_worker_cls from verl import DataProto from verl.trainer.ppo import core_algos diff --git a/verl/trainer/ppo/workers/fsdp_workers.py b/verl/trainer/ppo/workers/fsdp_workers.py index 9d47bae..f9a68f1 100644 --- a/verl/trainer/ppo/workers/fsdp_workers.py +++ b/verl/trainer/ppo/workers/fsdp_workers.py @@ -23,8 +23,8 @@ import torch.distributed from omegaconf import DictConfig, open_dict -from single_controller.base import Worker -from single_controller.base.decorator import register, Dispatch +from verl.single_controller.base import Worker +from verl.single_controller.base.decorator import register, Dispatch import verl.utils.torch_functional as verl_F from verl import DataProto from verl.trainer.ppo.actor import DataParallelPPOActor @@ -304,7 +304,7 @@ def init_model(self): torch.cuda.empty_cache() - @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) + @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO, blocking=False) def update_actor(self, data: DataProto): data = data.to('cuda') @@ -592,7 +592,7 @@ def compute_values(self, data: DataProto): torch.cuda.empty_cache() return output - @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO) + @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO, blocking=False) def update_critic(self, data: DataProto): data = data.to('cuda') if self._is_offload_param: diff --git a/verl/trainer/ppo/workers/megatron_workers.py b/verl/trainer/ppo/workers/megatron_workers.py index a481d46..b2ce989 100644 --- a/verl/trainer/ppo/workers/megatron_workers.py +++ b/verl/trainer/ppo/workers/megatron_workers.py @@ -22,13 +22,13 @@ import torch.distributed import torch.nn as nn from omegaconf import DictConfig -from single_controller.base.megatron.worker import MegatronWorker +from verl.single_controller.base.megatron.worker import MegatronWorker from verl.trainer.ppo.actor.megatron_actor import MegatronPPOActor from verl.trainer.ppo.critic.megatron_critic import MegatronPPOCritic from verl.trainer.ppo.hybrid_engine import AllGatherPPModel from verl.trainer.ppo.reward_model.megatron.reward_model import MegatronRewardModel -from single_controller.base.decorator import register, Dispatch +from verl.single_controller.base.decorator import register, Dispatch from verl import DataProto from verl.utils.fs import copy_local_path_from_hdfs from verl.utils.debug import log_gpu_memory_usage