Update

[ghstack-poisoned]
pytorch · Dec 15, 2024 · f872d5c · f872d5c
2 parents 16d934c + b776b63
commit f872d5c
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 6 deletions.
diff --git a/sota-implementations/ppo/ppo_atari.py b/sota-implementations/ppo/ppo_atari.py
@@ -74,7 +74,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         device=device,
         storing_device=device,
         max_frames_per_traj=-1,
-        compile_policy={"mode": compile_mode} if compile_mode else False,
+        compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,
         cudagraph_policy=cfg.compile.cudagraphs,
     )
 
@@ -166,7 +166,7 @@ def update(batch, num_network_updates):
                 group["lr"] = cfg_optim_lr * alpha
         if cfg_loss_anneal_clip_eps:
             loss_module.clip_epsilon.copy_(cfg_loss_clip_epsilon * alpha)
-        num_network_updates += 1
+        num_network_updates = num_network_updates + 1
         # Get a data batch
         batch = batch.to(device, non_blocking=True)
 

diff --git a/sota-implementations/ppo/ppo_mujoco.py b/sota-implementations/ppo/ppo_mujoco.py
@@ -74,7 +74,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         device=device,
         storing_device=device,
         max_frames_per_traj=-1,
-        compile_policy={"mode": compile_mode} if compile_mode else False,
+        compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,
         cudagraph_policy=cfg.compile.cudagraphs,
     )
 
@@ -153,7 +153,7 @@ def update(batch, num_network_updates):
                 group["lr"] = cfg_optim_lr * alpha
         if cfg_loss_anneal_clip_eps:
             loss_module.clip_epsilon.copy_(cfg_loss_clip_epsilon * alpha)
-        num_network_updates += 1
+        num_network_updates = num_network_updates + 1
 
         # Forward pass PPO loss
         loss = loss_module(batch)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -47,6 +47,7 @@
     _ProcessNoWarn,
     _replace_last,
     accept_remote_rref_udf_invocation,
+    compile_with_warmup,
     logger as torchrl_logger,
     prod,
     RL_WARNINGS,
@@ -67,7 +68,6 @@
     set_exploration_type,
 )
 
-
 try:
     from torch.compiler import cudagraph_mark_step_begin
 except ImportError:
@@ -661,7 +661,9 @@ def __init__(
             self.policy_weights = TensorDict()
 
         if self.compiled_policy:
-            self.policy = torch.compile(self.policy, **self.compiled_policy_kwargs)
+            self.policy = compile_with_warmup(
+                self.policy, **self.compiled_policy_kwargs
+            )
         if self.cudagraphed_policy:
             self.policy = CudaGraphModule(self.policy, **self.cudagraphed_policy_kwargs)