Merge branch 'main' into pettingzoo_tuto

# Conflicts: # torchrl/envs/libs/pettingzoo.py
pytorch · Apr 9, 2024 · c744307 · c744307
2 parents 285a1d1 + 79e2b07
commit c744307
Show file tree

Hide file tree

Showing 12 changed files with 449 additions and 57 deletions.
diff --git a/.github/unittest/linux_libs/scripts_pettingzoo/environment.yml b/.github/unittest/linux_libs/scripts_pettingzoo/environment.yml
@@ -8,7 +8,6 @@ dependencies:
     - cloudpickle
     - gym
     - gym-notices
-    - importlib-metadata
     - six
     - zipp
     - pytest

diff --git a/.github/unittest/linux_libs/scripts_robohive/setup_env.sh b/.github/unittest/linux_libs/scripts_robohive/setup_env.sh
@@ -76,4 +76,6 @@ pip install pip --upgrade
 
 conda env update --file "${this_dir}/environment.yml" --prune
 
+conda install conda-forge::ffmpeg -y
+
 pip install git+https://github.com/vikashplus/robohive@main
diff --git a/.github/unittest/linux_libs/scripts_smacv2/environment.yml b/.github/unittest/linux_libs/scripts_smacv2/environment.yml
@@ -7,7 +7,6 @@ dependencies:
     - cloudpickle
     - gym
     - gym-notices
-    - importlib-metadata
     - zipp
     - pytest
     - pytest-cov

diff --git a/.github/unittest/linux_libs/scripts_vmas/environment.yml b/.github/unittest/linux_libs/scripts_vmas/environment.yml
@@ -7,7 +7,6 @@ dependencies:
     - cloudpickle
     - gym
     - gym-notices
-    - importlib-metadata
     - numpy
     - pyglet==1.5.27
     - six

diff --git a/test/test_libs.py b/test/test_libs.py
@@ -278,7 +278,6 @@ def _make_spec(  # noqa: F811
 
     @pytest.mark.parametrize("categorical", [True, False])
     def test_gym_spec_cast(self, categorical):
-
         batch_size = [3, 4]
         cat = DiscreteTensorSpec if categorical else OneHotDiscreteTensorSpec
         cat_shape = batch_size if categorical else (*batch_size, 5)
@@ -543,7 +542,6 @@ def test_torchrl_to_gym(self, backend, numpy):
         ],
     )
     def test_gym(self, env_name, frame_skip, from_pixels, pixels_only):
-
         if env_name == PONG_VERSIONED() and not from_pixels:
             # raise pytest.skip("already pixel")
             # we don't skip because that would raise an exception
@@ -3126,7 +3124,6 @@ class TestPettingZoo:
     def test_pistonball(
         self, parallel, continuous_actions, use_mask, return_state, group_map
     ):
-
         kwargs = {"n_pistons": 21, "continuous": continuous_actions}
 
         env = PettingZooEnv(
@@ -3141,6 +3138,60 @@ def test_pistonball(
 
         check_env_specs(env)
 
+    def test_dead_agents_done(self, seed=0):
+        scenario_args = {"n_walkers": 3, "terminate_on_fall": False}
+
+        env = PettingZooEnv(
+            task="multiwalker_v9",
+            parallel=True,
+            seed=seed,
+            use_mask=False,
+            done_on_any=False,
+            **scenario_args,
+        )
+        td_reset = env.reset(seed=seed)
+        with pytest.raises(
+            ValueError,
+            match="Dead agents found in the environment, "
+            "you need to set use_mask=True to allow this.",
+        ):
+            env.rollout(
+                max_steps=500,
+                break_when_any_done=True,  # This looks at root done set with done_on_any
+                auto_reset=False,
+                tensordict=td_reset,
+            )
+
+        for done_on_any in [True, False]:
+            env = PettingZooEnv(
+                task="multiwalker_v9",
+                parallel=True,
+                seed=seed,
+                use_mask=True,
+                done_on_any=done_on_any,
+                **scenario_args,
+            )
+            td_reset = env.reset(seed=seed)
+            td = env.rollout(
+                max_steps=500,
+                break_when_any_done=True,  # This looks at root done set with done_on_any
+                auto_reset=False,
+                tensordict=td_reset,
+            )
+            done = td.get(("next", "walker", "done"))
+            mask = td.get(("next", "walker", "mask"))
+
+            if done_on_any:
+                assert not done[-1].all()  # Done triggered on any
+            else:
+                assert done[-1].all()  # Done triggered on all
+            assert not done[
+                mask
+            ].any()  # When mask is true (alive agent), all agents are not done
+            assert done[
+                ~mask
+            ].all()  # When mask is false (dead agent), all agents are done
+
     @pytest.mark.parametrize(
         "wins_player_0",
         [True, False],
@@ -3156,7 +3207,6 @@ def test_tic_tac_toe(self, wins_player_0):
         )
 
         class Policy:
-
             action = 0
             t = 0
 

diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -9331,7 +9331,239 @@ def test_transform_inverse(self):
         return
 
 
-class TestDeviceCastTransform(TransformBase):
+class TestDeviceCastTransformPart(TransformBase):
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_single_trans_env_check(self, in_keys, out_keys, in_keys_inv, out_keys_inv):
+        env = ContinuousActionVecMockEnv(device="cpu:0")
+        env = TransformedEnv(
+            env,
+            DeviceCastTransform(
+                "cpu:1",
+                in_keys=in_keys,
+                out_keys=out_keys,
+                in_keys_inv=in_keys_inv,
+                out_keys_inv=out_keys_inv,
+            ),
+        )
+        assert env.device is None
+        check_env_specs(env)
+
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_serial_trans_env_check(self, in_keys, out_keys, in_keys_inv, out_keys_inv):
+        def make_env():
+            return TransformedEnv(
+                ContinuousActionVecMockEnv(device="cpu:0"),
+                DeviceCastTransform(
+                    "cpu:1",
+                    in_keys=in_keys,
+                    out_keys=out_keys,
+                    in_keys_inv=in_keys_inv,
+                    out_keys_inv=out_keys_inv,
+                ),
+            )
+
+        env = SerialEnv(2, make_env)
+        assert env.device is None
+        check_env_specs(env)
+
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_parallel_trans_env_check(
+        self, in_keys, out_keys, in_keys_inv, out_keys_inv
+    ):
+        def make_env():
+            return TransformedEnv(
+                ContinuousActionVecMockEnv(device="cpu:0"),
+                DeviceCastTransform(
+                    "cpu:1",
+                    in_keys=in_keys,
+                    out_keys=out_keys,
+                    in_keys_inv=in_keys_inv,
+                    out_keys_inv=out_keys_inv,
+                ),
+            )
+
+        env = ParallelEnv(
+            2,
+            make_env,
+            mp_start_method="fork" if not torch.cuda.is_available() else "spawn",
+        )
+        assert env.device is None
+        try:
+            check_env_specs(env)
+        finally:
+            env.close()
+
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_trans_serial_env_check(self, in_keys, out_keys, in_keys_inv, out_keys_inv):
+        def make_env():
+            return ContinuousActionVecMockEnv(device="cpu:0")
+
+        env = TransformedEnv(
+            SerialEnv(2, make_env),
+            DeviceCastTransform(
+                "cpu:1",
+                in_keys=in_keys,
+                out_keys=out_keys,
+                in_keys_inv=in_keys_inv,
+                out_keys_inv=out_keys_inv,
+            ),
+        )
+        assert env.device is None
+        check_env_specs(env)
+
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_trans_parallel_env_check(
+        self, in_keys, out_keys, in_keys_inv, out_keys_inv
+    ):
+        def make_env():
+            return ContinuousActionVecMockEnv(device="cpu:0")
+
+        env = TransformedEnv(
+            ParallelEnv(
+                2,
+                make_env,
+                mp_start_method="fork" if not torch.cuda.is_available() else "spawn",
+            ),
+            DeviceCastTransform(
+                "cpu:1",
+                in_keys=in_keys,
+                out_keys=out_keys,
+                in_keys_inv=in_keys_inv,
+                out_keys_inv=out_keys_inv,
+            ),
+        )
+        assert env.device is None
+        try:
+            check_env_specs(env)
+        finally:
+            env.close()
+
+    def test_transform_no_env(self):
+        t = DeviceCastTransform("cpu:1", "cpu:0", in_keys=["a"], out_keys=["b"])
+        td = TensorDict({"a": torch.randn((), device="cpu:0")}, [], device="cpu:0")
+        tdt = t._call(td)
+        assert tdt.device is None
+
+    @pytest.mark.parametrize("in_keys", ["observation"])
+    @pytest.mark.parametrize("out_keys", [None, ["obs_device"]])
+    @pytest.mark.parametrize("in_keys_inv", ["action"])
+    @pytest.mark.parametrize("out_keys_inv", [None, ["action_device"]])
+    def test_transform_env(self, in_keys, out_keys, in_keys_inv, out_keys_inv):
+        env = ContinuousActionVecMockEnv(device="cpu:0")
+        env = TransformedEnv(
+            env,
+            DeviceCastTransform(
+                "cpu:1",
+                in_keys=in_keys,
+                out_keys=out_keys,
+                in_keys_inv=in_keys_inv,
+                out_keys_inv=out_keys_inv,
+            ),
+        )
+        assert env.device is None
+        assert env.transform.device == torch.device("cpu:1")
+        assert env.transform.orig_device == torch.device("cpu:0")
+
+    def test_transform_compose(self):
+        t = Compose(
+            DeviceCastTransform(
+                "cpu:1",
+                "cpu:0",
+                in_keys=["a"],
+                out_keys=["b"],
+                in_keys_inv=["c"],
+                out_keys_inv=["d"],
+            )
+        )
+
+        td = TensorDict(
+            {
+                "a": torch.randn((), device="cpu:0"),
+                "c": torch.randn((), device="cpu:1"),
+            },
+            [],
+            device="cpu:0",
+        )
+        tdt = t._call(td)
+        tdit = t._inv_call(td)
+
+        assert tdt.device is None
+        assert tdit.device is None
+
+    def test_transform_model(self):
+        t = nn.Sequential(
+            Compose(
+                DeviceCastTransform(
+                    "cpu:1",
+                    "cpu:0",
+                    in_keys=["a"],
+                    out_keys=["b"],
+                    in_keys_inv=["c"],
+                    out_keys_inv=["d"],
+                )
+            )
+        )
+        td = TensorDict(
+            {
+                "a": torch.randn((), device="cpu:0"),
+                "c": torch.randn((), device="cpu:1"),
+            },
+            [],
+            device="cpu:0",
+        )
+        tdt = t(td)
+
+        assert tdt.device is None
+
+    @pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
+    @pytest.mark.parametrize("storage", [LazyTensorStorage])
+    def test_transform_rb(self, rbclass, storage):
+        # we don't test casting to cuda on Memmap tensor storage since it's discouraged
+        t = Compose(
+            DeviceCastTransform(
+                "cpu:1",
+                "cpu:0",
+                in_keys=["a"],
+                out_keys=["b"],
+                in_keys_inv=["c"],
+                out_keys_inv=["d"],
+            )
+        )
+        rb = rbclass(storage=storage(max_size=20, device="auto"))
+        rb.append_transform(t)
+        td = TensorDict(
+            {
+                "a": torch.randn((), device="cpu:0"),
+                "c": torch.randn((), device="cpu:1"),
+            },
+            [],
+            device="cpu:0",
+        )
+        rb.add(td)
+        assert rb._storage._storage.device is None
+        assert rb.sample(4).device is None
+
+    def test_transform_inverse(self):
+        # Tested before
+        return
+
+
+class TestDeviceCastTransformWhole(TransformBase):
     def test_single_trans_env_check(self):
         env = ContinuousActionVecMockEnv(device="cpu:0")
         env = TransformedEnv(env, DeviceCastTransform("cpu:1"))

diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
@@ -927,19 +927,10 @@ def max_size_along_dim0(data_shape):
             return (self.max_size, *data_shape)
 
         if is_tensor_collection(data):
-            out = (
-                data.expand(max_size_along_dim0(data.shape))
-                .clone()
-                .zero_()
-                .to(self.device)
-            )
-        elif is_tensor_collection(data):
-            out = (
-                data.expand(max_size_along_dim0(data.shape))
-                .clone()
-                .zero_()
-                .to(self.device)
-            )
+            out = data.expand(max_size_along_dim0(data.shape))
+            out = out.clone()
+            out = out.zero_()
+            out = out.to(self.device)
         else:
             # if Tensor, we just create a MemoryMappedTensor of the desired shape, device and dtype
             out = tree_map(
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,7 +8,6 @@ dependencies: @@
         - cloudpickle
         - gym
         - gym-notices
-        - importlib-metadata
         - six
         - zipp
         - pytest
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -76,4 +76,6 @@ pip install pip --upgrade

		conda env update --file "${this_dir}/environment.yml" --prune

		conda install conda-forge::ffmpeg -y

		pip install git+https://github.com/vikashplus/robohive@main