[Quality] IMPALA auto-device

ghstack-source-id: 202e8a48b78cc03277f928cbab696d26253bc0ee Pull Request resolved: #2654
pytorch · Dec 15, 2024 · 5bb6abd · 5bb6abd
1 parent 4b65245
commit 5bb6abd
Show file tree

Hide file tree

Showing 8 changed files with 20 additions and 7 deletions.
diff --git a/sota-implementations/impala/config_multi_node_ray.yaml b/sota-implementations/impala/config_multi_node_ray.yaml
@@ -24,7 +24,7 @@ ray_init_config:
   storage: null
 
 # Device for the forward and backward passes
-local_device: "cuda:0"
+local_device:
 
 # Resources assigned to each IMPALA rollout collection worker
 remote_worker_resources:

diff --git a/sota-implementations/impala/config_multi_node_submitit.yaml b/sota-implementations/impala/config_multi_node_submitit.yaml
@@ -3,7 +3,7 @@ env:
   env_name: PongNoFrameskip-v4
 
 # Device for the forward and backward passes
-local_device: "cuda:0"
+local_device:
 
 # SLURM config
 slurm_config:

diff --git a/sota-implementations/impala/config_single_node.yaml b/sota-implementations/impala/config_single_node.yaml
@@ -3,7 +3,7 @@ env:
   env_name: PongNoFrameskip-v4
 
 # Device for the forward and backward passes
-device: "cuda:0"
+device:
 
 # collector
 collector:

diff --git a/sota-implementations/impala/impala_multi_node_ray.py b/sota-implementations/impala/impala_multi_node_ray.py
@@ -32,7 +32,11 @@ def main(cfg: "DictConfig"):  # noqa: F821
     from torchrl.record.loggers import generate_exp_name, get_logger
     from utils import eval_model, make_env, make_ppo_models
 
-    device = torch.device(cfg.local_device)
+    device = cfg.local_device
+    if not device:
+        device = torch.device("cpu" if not torch.cuda.is_available() else "cuda:0")
+    else:
+        device = torch.device(device)
 
     # Correct for frame_skip
     frame_skip = 4

diff --git a/sota-implementations/impala/impala_multi_node_submitit.py b/sota-implementations/impala/impala_multi_node_submitit.py
@@ -34,7 +34,11 @@ def main(cfg: "DictConfig"):  # noqa: F821
     from torchrl.record.loggers import generate_exp_name, get_logger
     from utils import eval_model, make_env, make_ppo_models
 
-    device = torch.device(cfg.local_device)
+    device = cfg.local_device
+    if not device:
+        device = torch.device("cpu" if not torch.cuda.is_available() else "cuda:0")
+    else:
+        device = torch.device(device)
 
     # Correct for frame_skip
     frame_skip = 4

diff --git a/sota-implementations/impala/impala_single_node.py b/sota-implementations/impala/impala_single_node.py
@@ -31,7 +31,11 @@ def main(cfg: "DictConfig"):  # noqa: F821
     from torchrl.record.loggers import generate_exp_name, get_logger
     from utils import eval_model, make_env, make_ppo_models
 
-    device = torch.device(cfg.device)
+    device = cfg.device
+    if not device:
+        device = torch.device("cpu" if not torch.cuda.is_available() else "cuda:0")
+    else:
+        device = torch.device(device)
 
     # Correct for frame_skip
     frame_skip = 4
@@ -55,7 +59,6 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
     # Create models (check utils.py)
     actor, critic = make_ppo_models(cfg.env.env_name)
-    actor, critic = actor.to(device), critic.to(device)
 
     # Create collector
     collector = MultiaSyncDataCollector(

diff --git a/sota-implementations/ppo/ppo_atari.py b/sota-implementations/ppo/ppo_atari.py
@@ -178,6 +178,7 @@ def update(batch, num_network_updates):
         # Update the networks
         optim.step()
         return loss.detach().set("alpha", alpha), num_network_updates.clone()
+
     if cfg.compile.compile:
         update = compile_with_warmup(update, mode=compile_mode, warmup=1)
         adv_module = compile_with_warmup(adv_module, mode=compile_mode, warmup=1)

diff --git a/sota-implementations/ppo/ppo_mujoco.py b/sota-implementations/ppo/ppo_mujoco.py
@@ -163,6 +163,7 @@ def update(batch, num_network_updates):
         # Update the networks
         optim.step()
         return loss.detach().set("alpha", alpha), num_network_updates.clone()
+
     if cfg.compile.compile:
         update = compile_with_warmup(update, mode=compile_mode, warmup=1)
         adv_module = compile_with_warmup(adv_module, mode=compile_mode, warmup=1)