diff --git a/.github/workflows/yapf_format.yml b/.github/workflows/yapf_format.yml
index c6d27c3..548df4c 100644
--- a/.github/workflows/yapf_format.yml
+++ b/.github/workflows/yapf_format.yml
@@ -42,4 +42,4 @@ jobs:
           pip install toml==0.10.2
       - name: Running yapf
         run: |
-          yapf -r -vv -d --style=./.style.yapf verl tests single_controller examples
+          yapf -r -vv -d --style=./.style.yapf verl tests examples
diff --git a/README.md b/README.md
index faa3d1f..2f96ba3 100644
--- a/README.md
+++ b/README.md
@@ -180,7 +180,7 @@ pip3 install yapf
 ```
 Then, make sure you are at top level of verl repo and run
 ```bash
-yapf -ir -vv --style ./.style.yapf verl single_controller examples
+yapf -ir -vv --style ./.style.yapf verl examples
 ```
 
 
diff --git a/docs/advance/dpo_extension.rst b/docs/advance/dpo_extension.rst
index fb7754c..592d971 100644
--- a/docs/advance/dpo_extension.rst
+++ b/docs/advance/dpo_extension.rst
@@ -47,8 +47,8 @@ Implementation details:
 
 .. code:: python
 
-   from single_controller.base import Worker
-   from single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool
+   from verl.single_controller.base import Worker
+   from verl.single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool
    import ray
 
    @ray.remote
@@ -75,7 +75,7 @@ API: compute reference log probability
 
 .. code:: python
 
-   from single_controller.base import Worker
+   from verl.single_controller.base import Worker
    import ray
 
    @ray.remote
@@ -93,7 +93,7 @@ API: Update actor model parameters
 
 .. code:: python
 
-   from single_controller.base import Worker
+   from verl.single_controller.base import Worker
    import ray
 
    @ray.remote
@@ -184,7 +184,7 @@ registered into the worker_group**
 
 .. code:: python
 
-   from single_controller.base.decorator import register
+   from verl.single_controller.base.decorator import register
 
    def dispatch_data(worker_group, data):
        return data.chunk(worker_group.world_size)
@@ -214,11 +214,11 @@ computation, and data collection.
 
 Furthermore, the model parallelism size of each model is usually fixed,
 including dp, tp, pp. So for these common distributed scenarios, we have
-pre-implemented specific dispatch and collect methods,in `decorator.py <https://github.com/volcengine/verl/blob/main/single_controller/base/decorator.py>`_, which can be directly used to wrap the computations.
+pre-implemented specific dispatch and collect methods,in `decorator.py <https://github.com/volcengine/verl/blob/main/verl/single_controller/base/decorator.py>`_, which can be directly used to wrap the computations.
 
 .. code:: python
 
-   from single_controller.base.decorator import register, Dispatch
+   from verl.single_controller.base.decorator import register, Dispatch
 
    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
    def generate_sequences(self, data: DataProto) -> DataProto:
diff --git a/docs/examples/ppo_code_architecture.rst b/docs/examples/ppo_code_architecture.rst
index bd247a2..ab1f66a 100644
--- a/docs/examples/ppo_code_architecture.rst
+++ b/docs/examples/ppo_code_architecture.rst
@@ -49,13 +49,13 @@ Define worker classes
    if config.actor_rollout_ref.actor.strategy == 'fsdp': # for FSDP backend
        assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
        from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker
-       from single_controller.ray import RayWorkerGroup
+       from verl.single_controller.ray import RayWorkerGroup
        ray_worker_group_cls = RayWorkerGroup
 
    elif config.actor_rollout_ref.actor.strategy == 'megatron': # for Megatron backend
        assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
        from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker
-       from single_controller.ray.megatron import NVMegatronRayWorkerGroup
+       from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
        ray_worker_group_cls = NVMegatronRayWorkerGroup # Ray worker class for Megatron-LM
 
    else:
diff --git a/docs/workers/megatron_workers.rst b/docs/workers/megatron_workers.rst
index 5a8f5ed..d6f88c3 100644
--- a/docs/workers/megatron_workers.rst
+++ b/docs/workers/megatron_workers.rst
@@ -40,7 +40,7 @@ We implement various of APIs for each ``Worker`` class decorated by the
 ``@register(dispatch_mode=)`` . These APIs can be called by the ray
 driver process. The data can be correctly collect and dispatch following
 the ``dispatch_mode`` on each function. The supported dispatch_model
-(i.e., transfer protocols) can be found in `decorator.py <https://github.com/volcengine/verl/blob/main/single_controller/base/decorator.py>`_.
+(i.e., transfer protocols) can be found in `decorator.py <https://github.com/volcengine/verl/blob/main/verl/single_controller/base/decorator.py>`_.
 
 ActorRolloutRefWorker
 ^^^^^^^^^^^^^^^^^^^^^
diff --git a/examples/ray/tutorial.ipynb b/examples/ray/tutorial.ipynb
index 9b8591a..f270cd9 100644
--- a/examples/ray/tutorial.ipynb
+++ b/examples/ray/tutorial.ipynb
@@ -232,8 +232,8 @@
    },
    "outputs": [],
    "source": [
-    "from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool\n",
-    "from single_controller.base import Worker"
+    "from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool\n",
+    "from verl.single_controller.base import Worker"
    ]
   },
   {
@@ -437,7 +437,7 @@
    },
    "outputs": [],
    "source": [
-    "from single_controller.ray.decorator import register, Dispatch, Execute"
+    "from verl.single_controller.ray.decorator import register, Dispatch, Execute"
    ]
   },
   {
@@ -518,7 +518,7 @@
    },
    "outputs": [],
    "source": [
-    "from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute"
+    "from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute"
    ]
   },
   {
@@ -723,10 +723,10 @@
    },
    "outputs": [],
    "source": [
-    "from single_controller.ray.decorator import register, Dispatch, Execute\n",
-    "from single_controller.ray.megatron import NVMegatronRayWorkerGroup\n",
-    "from single_controller.base.megatron.worker import MegatronWorker\n",
-    "from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup\n",
+    "from verl.single_controller.ray.decorator import register, Dispatch, Execute\n",
+    "from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup\n",
+    "from verl.single_controller.base.megatron.worker import MegatronWorker\n",
+    "from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup\n",
     "from omegaconf import OmegaConf\n",
     "from megatron.core import parallel_state as mpu"
    ]
diff --git a/examples/split_placement/main_ppo_split.py b/examples/split_placement/main_ppo_split.py
index 524f35e..5ae4b21 100644
--- a/examples/split_placement/main_ppo_split.py
+++ b/examples/split_placement/main_ppo_split.py
@@ -121,13 +121,13 @@ def main_task(config):
     if config.actor_rollout_ref.actor.strategy == 'fsdp':
         assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
         from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker
-        from single_controller.ray import RayWorkerGroup
+        from verl.single_controller.ray import RayWorkerGroup
         ray_worker_group_cls = RayWorkerGroup
 
     elif config.actor_rollout_ref.actor.strategy == 'megatron':
         assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
         from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker
-        from single_controller.ray.megatron import NVMegatronRayWorkerGroup
+        from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
         ray_worker_group_cls = NVMegatronRayWorkerGroup
 
     else:
diff --git a/examples/split_placement/split_monkey_patch.py b/examples/split_placement/split_monkey_patch.py
index 70ed267..5e09377 100644
--- a/examples/split_placement/split_monkey_patch.py
+++ b/examples/split_placement/split_monkey_patch.py
@@ -16,7 +16,7 @@
 """
 import os
 from pprint import pprint
-from single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs
+from verl.single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs
 from verl import DataProto
 from verl.trainer.ppo.ray_trainer import compute_advantage, apply_kl_penalty, reduce_metrics, compute_data_metrics, Role, create_colocated_worker_cls
 from codetiming import Timer
diff --git a/tests/ray/check_worker_alive/main.py b/tests/ray/check_worker_alive/main.py
index 9526f1b..fcebbfe 100644
--- a/tests/ray/check_worker_alive/main.py
+++ b/tests/ray/check_worker_alive/main.py
@@ -18,9 +18,9 @@
 
 import ray
 
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
-from single_controller.base.worker import Worker
-from single_controller.base.decorator import register, Dispatch
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
+from verl.single_controller.base.worker import Worker
+from verl.single_controller.base.decorator import register, Dispatch
 
 
 @ray.remote
diff --git a/tests/ray/detached_worker/client.py b/tests/ray/detached_worker/client.py
index 0595bcf..1773fff 100644
--- a/tests/ray/detached_worker/client.py
+++ b/tests/ray/detached_worker/client.py
@@ -19,8 +19,8 @@
 import torch
 
 from verl import DataProto
-from single_controller.ray import RayClassWithInitArgs
-from single_controller.ray.megatron import NVMegatronRayWorkerGroup
+from verl.single_controller.ray import RayClassWithInitArgs
+from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
 
 from tensordict import TensorDict
 
diff --git a/tests/ray/detached_worker/server.py b/tests/ray/detached_worker/server.py
index 1842f0a..c8057e3 100644
--- a/tests/ray/detached_worker/server.py
+++ b/tests/ray/detached_worker/server.py
@@ -25,10 +25,10 @@
 from torch import nn
 
 import ray
-from single_controller.ray import RayClassWithInitArgs, RayResourcePool
-from single_controller.ray.megatron import NVMegatronRayWorkerGroup
-from single_controller.base.megatron.worker import MegatronWorker
-from single_controller.ray.decorator import register, Dispatch
+from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool
+from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
+from verl.single_controller.base.megatron.worker import MegatronWorker
+from verl.single_controller.ray.decorator import register, Dispatch
 from verl import DataProto
 from verl.models.llama.megatron import ParallelLlamaForCausalLMRmPadPP
 
diff --git a/tests/ray/test_colocated_workers.py b/tests/ray/test_colocated_workers.py
index 0515400..96b859b 100644
--- a/tests/ray/test_colocated_workers.py
+++ b/tests/ray/test_colocated_workers.py
@@ -14,9 +14,9 @@
 
 import ray
 
-from single_controller.base import Worker
-from single_controller.base.decorator import register, Dispatch
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
+from verl.single_controller.base import Worker
+from verl.single_controller.base.decorator import register, Dispatch
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
 
 from verl import DataProto
 
diff --git a/tests/ray/test_data_transfer.py b/tests/ray/test_data_transfer.py
index 480e576..46b962c 100644
--- a/tests/ray/test_data_transfer.py
+++ b/tests/ray/test_data_transfer.py
@@ -15,10 +15,10 @@
 In this test, we instantiate a data parallel worker with 8 GPUs
 """
 
-from single_controller.base import Worker
-from single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool
+from verl.single_controller.base import Worker
+from verl.single_controller.ray import RayWorkerGroup, RayClassWithInitArgs, RayResourcePool
 
-from single_controller.base.decorator import Dispatch, register
+from verl.single_controller.base.decorator import Dispatch, register
 
 import ray
 import torch
diff --git a/tests/ray/test_driverfunc_to_worker.py b/tests/ray/test_driverfunc_to_worker.py
index 2c7007b..ea253fd 100644
--- a/tests/ray/test_driverfunc_to_worker.py
+++ b/tests/ray/test_driverfunc_to_worker.py
@@ -18,9 +18,9 @@
 from verl import DataProto
 from tensordict import TensorDict
 
-from single_controller.base.worker import Worker
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs
-from single_controller.ray import RayWorkerGroup
+from verl.single_controller.base.worker import Worker
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs
+from verl.single_controller.ray import RayWorkerGroup
 
 os.environ['RAY_DEDUP_LOGS'] = '0'
 os.environ['NCCL_DEBUG'] = 'WARN'
diff --git a/tests/ray/test_high_level_scheduling_api.py b/tests/ray/test_high_level_scheduling_api.py
index 33d0d14..2d83206 100644
--- a/tests/ray/test_high_level_scheduling_api.py
+++ b/tests/ray/test_high_level_scheduling_api.py
@@ -16,8 +16,8 @@
 
 import ray
 
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool
-from single_controller.base.worker import Worker
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, merge_resource_pool
+from verl.single_controller.base.worker import Worker
 
 
 @ray.remote
diff --git a/tests/ray/test_ray_local_envs.py b/tests/ray/test_ray_local_envs.py
index 53bf850..542d536 100644
--- a/tests/ray/test_ray_local_envs.py
+++ b/tests/ray/test_ray_local_envs.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-e2e test single_controller.ray
+e2e test verl.single_controller.ray
 """
 import os
 import ray
 
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
-from single_controller.base.worker import Worker
-from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
+from verl.single_controller.base.worker import Worker
+from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute
 
 
 @ray.remote
diff --git a/tests/ray/test_remote_api.py b/tests/ray/test_remote_api.py
index aa4c1c1..b7a64b6 100644
--- a/tests/ray/test_remote_api.py
+++ b/tests/ray/test_remote_api.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from single_controller.remote import remote, RemoteBackend, SharedResourcePool
-from single_controller.base.decorator import register, Dispatch
-from single_controller.base.worker import Worker
+from verl.single_controller.remote import remote, RemoteBackend, SharedResourcePool
+from verl.single_controller.base.decorator import register, Dispatch
+from verl.single_controller.base.worker import Worker
 
 
 @remote(process_on_nodes=[3], use_gpu=True, name_prefix="actor", sharing=SharedResourcePool)
diff --git a/tests/ray/test_worker_group_basics.py b/tests/ray/test_worker_group_basics.py
index ee1ef10..fa18e9b 100644
--- a/tests/ray/test_worker_group_basics.py
+++ b/tests/ray/test_worker_group_basics.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-e2e test single_controller.ray
+e2e test verl.single_controller.ray
 """
 
 import torch
 import ray
 
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
-from single_controller.base.worker import Worker
-from single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
+from verl.single_controller.base.worker import Worker
+from verl.single_controller.ray.decorator import register, Dispatch, collect_all_to_all, Execute
 
 
 def two_to_all_dispatch_fn(worker_group, *args, **kwargs):
diff --git a/tests/ray/test_worker_group_torch.py b/tests/ray/test_worker_group_torch.py
index c48ed1e..13508ed 100644
--- a/tests/ray/test_worker_group_torch.py
+++ b/tests/ray/test_worker_group_torch.py
@@ -21,8 +21,8 @@
 import torch.distributed
 import ray
 
-from single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
-from single_controller.base.worker import Worker
+from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
+from verl.single_controller.base.worker import Worker
 
 
 @ray.remote
diff --git a/single_controller/__init__.py b/verl/single_controller/__init__.py
similarity index 100%
rename from single_controller/__init__.py
rename to verl/single_controller/__init__.py
diff --git a/single_controller/base/__init__.py b/verl/single_controller/base/__init__.py
similarity index 100%
rename from single_controller/base/__init__.py
rename to verl/single_controller/base/__init__.py
diff --git a/single_controller/base/decorator.py b/verl/single_controller/base/decorator.py
similarity index 93%
rename from single_controller/base/decorator.py
rename to verl/single_controller/base/decorator.py
index 9544ac6..6fdacb6 100644
--- a/single_controller/base/decorator.py
+++ b/verl/single_controller/base/decorator.py
@@ -75,7 +75,7 @@ def dispatch_megatron_compute(worker_group, *args, **kwargs):
     """
     User passes in dp data. The data is dispatched to all tp/pp ranks with the same dp
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group,
                       MegatronWorkerGroup), f'worker_group must be MegatronWorkerGroup, Got {type(worker_group)}'
 
@@ -104,7 +104,7 @@ def collect_megatron_compute(worker_group, output):
     """
     Only collect the data from the tp=0 and pp=last and every dp ranks
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
     output_in_dp = []
     pp_size = worker_group.get_megatron_global_info().pp_size
@@ -119,7 +119,7 @@ def dispatch_megatron_compute_data_proto(worker_group, *args, **kwargs):
     """
     All the args and kwargs must be DataProto. The batch will be chunked by dp_size and passed to each rank
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
 
     splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.dp_size, *args, **kwargs)
@@ -162,7 +162,7 @@ def dispatch_megatron_pp_as_dp(worker_group, *args, **kwargs):
     """
     treat pp as dp.
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
 
     pp_size = worker_group.pp_size
@@ -210,7 +210,7 @@ def collect_megatron_pp_as_dp(worker_group, output):
     """
     treat pp as dp. Only collect data on tp=0
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
     output_in_dp = []
     for global_rank in range(worker_group.world_size):
@@ -224,7 +224,7 @@ def collect_megatron_pp_only(worker_group, output):
     """
     Only collect output of megatron pp. This is useful when examine weight names as they are identical in tp/dp
     """
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
     output_in_pp = []
     for global_rank in range(worker_group.world_size):
@@ -235,7 +235,7 @@ def collect_megatron_pp_only(worker_group, output):
 
 
 def dispatch_megatron_pp_as_dp_data_proto(worker_group, *args, **kwargs):
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
 
     pp_dp_size = worker_group.dp_size * worker_group.pp_size
@@ -245,7 +245,7 @@ def dispatch_megatron_pp_as_dp_data_proto(worker_group, *args, **kwargs):
 
 def collect_megatron_pp_as_dp_data_proto(worker_group, output):
     from verl.protocol import DataProto
-    from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+    from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
     assert isinstance(worker_group, MegatronWorkerGroup)
 
     output = collect_megatron_pp_as_dp(worker_group, output)
@@ -253,7 +253,7 @@ def collect_megatron_pp_as_dp_data_proto(worker_group, output):
 
 
 def dispatch_dp_compute(worker_group, *args, **kwargs):
-    from single_controller.base.worker_group import WorkerGroup
+    from verl.single_controller.base.worker_group import WorkerGroup
     assert isinstance(worker_group, WorkerGroup)
     for arg in args:
         assert isinstance(arg, (Tuple, List)) and len(arg) == worker_group.world_size
@@ -263,21 +263,21 @@ def dispatch_dp_compute(worker_group, *args, **kwargs):
 
 
 def collect_dp_compute(worker_group, output):
-    from single_controller.base.worker_group import WorkerGroup
+    from verl.single_controller.base.worker_group import WorkerGroup
     assert isinstance(worker_group, WorkerGroup)
     assert len(output) == worker_group.world_size
     return output
 
 
 def dispatch_dp_compute_data_proto(worker_group, *args, **kwargs):
-    from single_controller.base.worker_group import WorkerGroup
+    from verl.single_controller.base.worker_group import WorkerGroup
     assert isinstance(worker_group, WorkerGroup)
     splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.world_size, *args, **kwargs)
     return splitted_args, splitted_kwargs
 
 
 def dispatch_dp_compute_data_proto_with_func(worker_group, *args, **kwargs):
-    from single_controller.base.worker_group import WorkerGroup
+    from verl.single_controller.base.worker_group import WorkerGroup
     assert isinstance(worker_group, WorkerGroup)
     assert type(args[0]) == FunctionType  # NOTE: The first one args is a function!
 
diff --git a/single_controller/base/dp.py b/verl/single_controller/base/dp.py
similarity index 96%
rename from single_controller/base/dp.py
rename to verl/single_controller/base/dp.py
index 5d534e8..2d19188 100644
--- a/single_controller/base/dp.py
+++ b/verl/single_controller/base/dp.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from single_controller.base.worker import Worker
+from verl.single_controller.base.worker import Worker
 
 
 class DPEngineWorker(Worker):
diff --git a/single_controller/base/megatron/__init__.py b/verl/single_controller/base/megatron/__init__.py
similarity index 100%
rename from single_controller/base/megatron/__init__.py
rename to verl/single_controller/base/megatron/__init__.py
diff --git a/single_controller/base/megatron/worker.py b/verl/single_controller/base/megatron/worker.py
similarity index 94%
rename from single_controller/base/megatron/worker.py
rename to verl/single_controller/base/megatron/worker.py
index 46608bb..2d84d29 100644
--- a/single_controller/base/megatron/worker.py
+++ b/verl/single_controller/base/megatron/worker.py
@@ -14,7 +14,7 @@
 
 import os
 from dataclasses import dataclass
-from single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo
+from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo
 
 
 class MegatronWorker(Worker):
diff --git a/single_controller/base/megatron/worker_group.py b/verl/single_controller/base/megatron/worker_group.py
similarity index 96%
rename from single_controller/base/megatron/worker_group.py
rename to verl/single_controller/base/megatron/worker_group.py
index 59a78ff..67c21d3 100644
--- a/single_controller/base/megatron/worker_group.py
+++ b/verl/single_controller/base/megatron/worker_group.py
@@ -15,7 +15,7 @@
 from typing import Dict
 
 from .worker import DistRankInfo, DistGlobalInfo
-from single_controller.base import ResourcePool, WorkerGroup
+from verl.single_controller.base import ResourcePool, WorkerGroup
 
 
 class MegatronWorkerGroup(WorkerGroup):
diff --git a/single_controller/base/register_center/__init__.py b/verl/single_controller/base/register_center/__init__.py
similarity index 100%
rename from single_controller/base/register_center/__init__.py
rename to verl/single_controller/base/register_center/__init__.py
diff --git a/single_controller/base/register_center/ray.py b/verl/single_controller/base/register_center/ray.py
similarity index 100%
rename from single_controller/base/register_center/ray.py
rename to verl/single_controller/base/register_center/ray.py
diff --git a/single_controller/base/worker.py b/verl/single_controller/base/worker.py
similarity index 95%
rename from single_controller/base/worker.py
rename to verl/single_controller/base/worker.py
index efd23a8..2ca961d 100644
--- a/single_controller/base/worker.py
+++ b/verl/single_controller/base/worker.py
@@ -17,7 +17,7 @@
 import os
 import socket
 from dataclasses import dataclass
-from single_controller.base.decorator import register, Dispatch
+from verl.single_controller.base.decorator import register, Dispatch
 
 
 @dataclass
@@ -43,7 +43,7 @@ def get_node_ip_by_sdk():
                 import ray
                 return ray._private.services.get_node_ip_address()
             elif os.getenv("WG_BACKEND", None) == "torch_rpc":
-                from single_controller.torchrpc.k8s_client import get_ip_addr
+                from verl.single_controller.torchrpc.k8s_client import get_ip_addr
                 return get_ip_addr()
             return None
 
@@ -110,7 +110,7 @@ def _configure_before_init(self, register_center_name: str, rank: int):
             }
 
             if os.getenv("WG_BACKEND", None) == "ray":
-                from single_controller.base.register_center.ray import create_worker_group_register_center
+                from verl.single_controller.base.register_center.ray import create_worker_group_register_center
                 self.register_center = create_worker_group_register_center(name=register_center_name,
                                                                            info=rank_zero_info)
 
diff --git a/single_controller/base/worker_group.py b/verl/single_controller/base/worker_group.py
similarity index 98%
rename from single_controller/base/worker_group.py
rename to verl/single_controller/base/worker_group.py
index a6bc927..bd58458 100644
--- a/single_controller/base/worker_group.py
+++ b/verl/single_controller/base/worker_group.py
@@ -20,7 +20,7 @@
 import time
 from typing import List, Any, Callable, Dict
 
-from single_controller.base.decorator import MAGIC_ATTR, Dispatch, get_predefined_dispatch_fn, get_predefined_execute_fn
+from verl.single_controller.base.decorator import MAGIC_ATTR, Dispatch, get_predefined_dispatch_fn, get_predefined_execute_fn
 
 
 class ResourcePool:
diff --git a/single_controller/ray/__init__.py b/verl/single_controller/ray/__init__.py
similarity index 100%
rename from single_controller/ray/__init__.py
rename to verl/single_controller/ray/__init__.py
diff --git a/single_controller/ray/base.py b/verl/single_controller/ray/base.py
similarity index 99%
rename from single_controller/ray/base.py
rename to verl/single_controller/ray/base.py
index 2cb8148..eaa1b00 100644
--- a/single_controller/ray/base.py
+++ b/verl/single_controller/ray/base.py
@@ -21,7 +21,7 @@
 from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy, NodeAffinitySchedulingStrategy
 from ray.experimental.state.api import get_actor
 
-from single_controller.base import WorkerGroup, ResourcePool, ClassWithInitArgs, Worker
+from verl.single_controller.base import WorkerGroup, ResourcePool, ClassWithInitArgs, Worker
 
 __all__ = ['Worker']
 
@@ -373,7 +373,7 @@ def world_size(self):
 """
 
 from unittest.mock import patch
-from single_controller.base.decorator import MAGIC_ATTR
+from verl.single_controller.base.decorator import MAGIC_ATTR
 import os
 
 
diff --git a/single_controller/ray/decorator.py b/verl/single_controller/ray/decorator.py
similarity index 97%
rename from single_controller/ray/decorator.py
rename to verl/single_controller/ray/decorator.py
index 006a80c..1de452f 100644
--- a/single_controller/ray/decorator.py
+++ b/verl/single_controller/ray/decorator.py
@@ -19,7 +19,7 @@
 import ray
 
 # compatiblity cern
-from single_controller.base.decorator import *
+from verl.single_controller.base.decorator import *
 
 
 def maybe_remote(main):
diff --git a/single_controller/ray/dist_data_pass_protocol.py b/verl/single_controller/ray/dist_data_pass_protocol.py
similarity index 100%
rename from single_controller/ray/dist_data_pass_protocol.py
rename to verl/single_controller/ray/dist_data_pass_protocol.py
diff --git a/single_controller/ray/dp.py b/verl/single_controller/ray/dp.py
similarity index 97%
rename from single_controller/ray/dp.py
rename to verl/single_controller/ray/dp.py
index fab4da9..b53d4b9 100644
--- a/single_controller/ray/dp.py
+++ b/verl/single_controller/ray/dp.py
@@ -14,7 +14,7 @@
 
 import ray
 
-from single_controller.ray.base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs
+from verl.single_controller.ray.base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs
 
 
 @ray.remote
diff --git a/single_controller/ray/megatron.py b/verl/single_controller/ray/megatron.py
similarity index 94%
rename from single_controller/ray/megatron.py
rename to verl/single_controller/ray/megatron.py
index 3aad741..2cdb49f 100644
--- a/single_controller/ray/megatron.py
+++ b/verl/single_controller/ray/megatron.py
@@ -17,8 +17,8 @@
 import ray
 
 from .base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs
-from single_controller.base.megatron.worker import DistRankInfo, DistGlobalInfo
-from single_controller.base.megatron.worker_group import MegatronWorkerGroup
+from verl.single_controller.base.megatron.worker import DistRankInfo, DistGlobalInfo
+from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup
 
 
 # NOTE(sgm): for opensource megatron-core
diff --git a/single_controller/version/version b/verl/single_controller/version/version
similarity index 100%
rename from single_controller/version/version
rename to verl/single_controller/version/version
diff --git a/verl/trainer/main_generation.py b/verl/trainer/main_generation.py
index 0d17073..42469b6 100644
--- a/verl/trainer/main_generation.py
+++ b/verl/trainer/main_generation.py
@@ -33,7 +33,7 @@
 from verl.utils.fs import copy_local_path_from_hdfs
 from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker
 from verl.utils.hdfs_io import makedirs
-from single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
+from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
 
 
 @hydra.main(config_path='config', config_name='generation', version_base=None)
diff --git a/verl/trainer/main_ppo.py b/verl/trainer/main_ppo.py
index cbb1c61..3c165b6 100644
--- a/verl/trainer/main_ppo.py
+++ b/verl/trainer/main_ppo.py
@@ -120,13 +120,13 @@ def main_task(config):
     if config.actor_rollout_ref.actor.strategy == 'fsdp':
         assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
         from verl.trainer.ppo.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker
-        from single_controller.ray import RayWorkerGroup
+        from verl.single_controller.ray import RayWorkerGroup
         ray_worker_group_cls = RayWorkerGroup
 
     elif config.actor_rollout_ref.actor.strategy == 'megatron':
         assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
         from verl.trainer.ppo.workers.megatron_workers import ActorRolloutRefWorker, CriticWorker
-        from single_controller.ray.megatron import NVMegatronRayWorkerGroup
+        from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
         ray_worker_group_cls = NVMegatronRayWorkerGroup
 
     else:
diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py
index 95814f5..3a2b258 100644
--- a/verl/trainer/ppo/ray_trainer.py
+++ b/verl/trainer/ppo/ray_trainer.py
@@ -26,9 +26,9 @@
 import numpy as np
 from codetiming import Timer
 
-from single_controller.base import Worker
-from single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs
-from single_controller.ray.base import create_colocated_worker_cls
+from verl.single_controller.base import Worker
+from verl.single_controller.ray import RayResourcePool, RayWorkerGroup, RayClassWithInitArgs
+from verl.single_controller.ray.base import create_colocated_worker_cls
 from verl import DataProto
 from verl.trainer.ppo import core_algos
 
diff --git a/verl/trainer/ppo/workers/fsdp_workers.py b/verl/trainer/ppo/workers/fsdp_workers.py
index 9d47bae..f9a68f1 100644
--- a/verl/trainer/ppo/workers/fsdp_workers.py
+++ b/verl/trainer/ppo/workers/fsdp_workers.py
@@ -23,8 +23,8 @@
 import torch.distributed
 from omegaconf import DictConfig, open_dict
 
-from single_controller.base import Worker
-from single_controller.base.decorator import register, Dispatch
+from verl.single_controller.base import Worker
+from verl.single_controller.base.decorator import register, Dispatch
 import verl.utils.torch_functional as verl_F
 from verl import DataProto
 from verl.trainer.ppo.actor import DataParallelPPOActor
@@ -304,7 +304,7 @@ def init_model(self):
 
         torch.cuda.empty_cache()
 
-    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
+    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO, blocking=False)
     def update_actor(self, data: DataProto):
         data = data.to('cuda')
 
@@ -592,7 +592,7 @@ def compute_values(self, data: DataProto):
         torch.cuda.empty_cache()
         return output
 
-    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO)
+    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO, blocking=False)
     def update_critic(self, data: DataProto):
         data = data.to('cuda')
         if self._is_offload_param:
diff --git a/verl/trainer/ppo/workers/megatron_workers.py b/verl/trainer/ppo/workers/megatron_workers.py
index a481d46..b2ce989 100644
--- a/verl/trainer/ppo/workers/megatron_workers.py
+++ b/verl/trainer/ppo/workers/megatron_workers.py
@@ -22,13 +22,13 @@
 import torch.distributed
 import torch.nn as nn
 from omegaconf import DictConfig
-from single_controller.base.megatron.worker import MegatronWorker
+from verl.single_controller.base.megatron.worker import MegatronWorker
 from verl.trainer.ppo.actor.megatron_actor import MegatronPPOActor
 from verl.trainer.ppo.critic.megatron_critic import MegatronPPOCritic
 from verl.trainer.ppo.hybrid_engine import AllGatherPPModel
 from verl.trainer.ppo.reward_model.megatron.reward_model import MegatronRewardModel
 
-from single_controller.base.decorator import register, Dispatch
+from verl.single_controller.base.decorator import register, Dispatch
 from verl import DataProto
 from verl.utils.fs import copy_local_path_from_hdfs
 from verl.utils.debug import log_gpu_memory_usage