Update

[ghstack-poisoned]
pytorch · Dec 15, 2024 · 72270ae · 72270ae
2 parents d0065b9 + 76486a9
commit 72270ae
Show file tree

Hide file tree

Showing 6 changed files with 6 additions and 1 deletion.
diff --git a/sota-implementations/gail/gail.py b/sota-implementations/gail/gail.py
@@ -276,6 +276,7 @@ def update(data, expert_data, num_network_updates=num_network_updates):
             expert_data = expert_data.to(device)
 
         with timeit("update"):
+            torch.compiler.cudagraph_mark_step_begin()
             metadata = update(data, expert_data)
         d_loss = metadata["dloss"]
         alpha = metadata["alpha"]

diff --git a/sota-implementations/iql/discrete_iql.py b/sota-implementations/iql/discrete_iql.py
@@ -187,6 +187,7 @@ def update(sampled_tensordict):
                         sampled_tensordict = replay_buffer.sample().to(device)
 
                     with timeit("training - update"):
+                        torch.compiler.cudagraph_mark_step_begin()
                         metadata = update(sampled_tensordict)
                     # update priority
                     if prb:

diff --git a/sota-implementations/iql/iql_offline.py b/sota-implementations/iql/iql_offline.py
@@ -141,6 +141,7 @@ def update(data):
             data = data.to(device)
 
         with timeit("update"):
+            torch.compiler.cudagraph_mark_step_begin()
             loss_info = update(data)
 
         # evaluation

diff --git a/sota-implementations/iql/iql_online.py b/sota-implementations/iql/iql_online.py
@@ -181,6 +181,7 @@ def update(sampled_tensordict):
                         # sample from replay buffer
                         sampled_tensordict = replay_buffer.sample().to(device)
                     with timeit("update"):
+                        torch.compiler.cudagraph_mark_step_begin()
                         loss_info = update(sampled_tensordict)
                     # update priority
                     if prb:

diff --git a/sota-implementations/ppo/ppo_atari.py b/sota-implementations/ppo/ppo_atari.py
@@ -238,7 +238,7 @@ def update(batch, num_network_updates):
                     data_buffer.extend(data_reshape)
 
                 for k, batch in enumerate(data_buffer):
-
+                    torch.compiler.cudagraph_mark_step_begin()
                     loss, num_network_updates = update(
                         batch, num_network_updates=num_network_updates
                     )

diff --git a/sota-implementations/ppo/ppo_mujoco.py b/sota-implementations/ppo/ppo_mujoco.py
@@ -227,6 +227,7 @@ def update(batch, num_network_updates):
                     data_buffer.extend(data_reshape)
 
                 for k, batch in enumerate(data_buffer):
+                    torch.compiler.cudagraph_mark_step_begin()
                     loss, num_network_updates = update(
                         batch, num_network_updates=num_network_updates
                     )