Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add Stack transform #2567

Merged
merged 1 commit into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ conda deactivate && conda activate ./env
python -c "import mlagents_envs"

python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestUnityMLAgents --runslow
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_transforms.py --instafail -v --durations 200 --capture no -k test_transform_env[unity]

coverage combine
coverage xml -i
157 changes: 155 additions & 2 deletions test/mocking_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from typing import Optional
from typing import Dict, List, Optional

import torch
import torch.nn as nn
Expand All @@ -24,7 +24,12 @@
from torchrl.data.utils import consolidate_spec
from torchrl.envs.common import EnvBase
from torchrl.envs.model_based.common import ModelBasedEnvBase
from torchrl.envs.utils import _terminated_or_truncated
from torchrl.envs.utils import (
_terminated_or_truncated,
check_marl_grouping,
MarlGroupMapType,
)


spec_dict = {
"bounded": Bounded,
Expand Down Expand Up @@ -1059,6 +1064,154 @@ def _step(
return tensordict


class MultiAgentCountingEnv(EnvBase):
"""A multi-agent env that is done after a given number of steps.

All agents have identical specs.

The count is incremented by 1 on each step.

"""

def __init__(
self,
n_agents: int,
group_map: MarlGroupMapType
| Dict[str, List[str]] = MarlGroupMapType.ALL_IN_ONE_GROUP,
max_steps: int = 5,
start_val: int = 0,
**kwargs,
):
super().__init__(**kwargs)
self.max_steps = max_steps
self.start_val = start_val
self.n_agents = n_agents
self.agent_names = [f"agent_{idx}" for idx in range(n_agents)]

if isinstance(group_map, MarlGroupMapType):
group_map = group_map.get_group_map(self.agent_names)
check_marl_grouping(group_map, self.agent_names)

self.group_map = group_map

observation_specs = {}
reward_specs = {}
done_specs = {}
action_specs = {}

for group_name, agents in group_map.items():
observation_specs[group_name] = {}
reward_specs[group_name] = {}
done_specs[group_name] = {}
action_specs[group_name] = {}

for agent_name in agents:
observation_specs[group_name][agent_name] = Composite(
observation=Unbounded(
(
*self.batch_size,
3,
4,
),
dtype=torch.float32,
device=self.device,
),
shape=self.batch_size,
device=self.device,
)
reward_specs[group_name][agent_name] = Composite(
reward=Unbounded(
(
*self.batch_size,
1,
),
device=self.device,
),
shape=self.batch_size,
device=self.device,
)
done_specs[group_name][agent_name] = Composite(
done=Categorical(
2,
dtype=torch.bool,
shape=(
*self.batch_size,
1,
),
device=self.device,
),
shape=self.batch_size,
device=self.device,
)
action_specs[group_name][agent_name] = Composite(
action=Binary(n=1, shape=[*self.batch_size, 1], device=self.device),
shape=self.batch_size,
device=self.device,
)

self.observation_spec = Composite(observation_specs)
self.reward_spec = Composite(reward_specs)
self.done_spec = Composite(done_specs)
self.action_spec = Composite(action_specs)
self.register_buffer(
"count",
torch.zeros((*self.batch_size, 1), device=self.device, dtype=torch.int),
)

def _set_seed(self, seed: Optional[int]):
torch.manual_seed(seed)

def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
if tensordict is not None and "_reset" in tensordict.keys():
_reset = tensordict.get("_reset")
self.count[_reset] = self.start_val
else:
self.count[:] = self.start_val

source = {}
for group_name, agents in self.group_map.items():
source[group_name] = {}
for agent_name in agents:
source[group_name][agent_name] = TensorDict(
source={
"observation": torch.rand(
(*self.batch_size, 3, 4), device=self.device
),
"done": self.count > self.max_steps,
"terminated": self.count > self.max_steps,
},
batch_size=self.batch_size,
device=self.device,
)

tensordict = TensorDict(source, batch_size=self.batch_size, device=self.device)
return tensordict

def _step(
self,
tensordict: TensorDictBase,
) -> TensorDictBase:
self.count += 1
source = {}
for group_name, agents in self.group_map.items():
source[group_name] = {}
for agent_name in agents:
source[group_name][agent_name] = TensorDict(
source={
"observation": torch.rand(
(*self.batch_size, 3, 4), device=self.device
),
"done": self.count > self.max_steps,
"terminated": self.count > self.max_steps,
"reward": torch.zeros_like(self.count, dtype=torch.float),
},
batch_size=self.batch_size,
device=self.device,
)
tensordict = TensorDict(source, batch_size=self.batch_size, device=self.device)
return tensordict


class IncrementingEnv(CountingEnv):
# Same as CountingEnv but always increments the count by 1 regardless of the action.
def _step(
Expand Down
Loading
Loading