Update (base update)

[ghstack-poisoned]
pytorch · Dec 15, 2024 · e1b471a · e1b471a
1 parent 9ea2c12
commit e1b471a
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/sota-implementations/gail/gail.py b/sota-implementations/gail/gail.py
@@ -95,6 +95,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         lmbda=cfg.ppo.loss.gae_lambda,
         value_network=critic,
         average_gae=False,
+        device=device,
     )
 
     loss_module = ClipPPOLoss(

diff --git a/sota-implementations/gail/ppo_utils.py b/sota-implementations/gail/ppo_utils.py
@@ -55,7 +55,7 @@ def make_ppo_models_state(proof_environment, compile, device):
         "low": proof_environment.action_spec_unbatched.space.low.to(device),
         "high": proof_environment.action_spec_unbatched.space.high.to(device),
         "tanh_loc": False,
-        "safe_tanh": not compile,
+        # "safe_tanh": not compile,
     }
 
     # Define policy architecture
@@ -77,7 +77,9 @@ def make_ppo_models_state(proof_environment, compile, device):
     policy_mlp = torch.nn.Sequential(
         policy_mlp,
         AddStateIndependentNormalScale(
-            proof_environment.action_spec_unbatched.shape[-1], scale_lb=1e-8
+            proof_environment.action_spec_unbatched.shape[-1],
+            scale_lb=1e-8,
+            device=device,
         ),
     )
 
@@ -102,6 +104,7 @@ def make_ppo_models_state(proof_environment, compile, device):
         activation_class=torch.nn.Tanh,
         out_features=1,
         num_cells=[64, 64],
+        device=device,
     )
 
     # Initialize value weights