instadeepai · WiemKhlifi · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
@@ -537,7 +537,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "id": "eWjNSGvZ7ALw"
    },
@@ -573,7 +573,7 @@
     "    )\n",
     "\n",
     "    # Initialise observation with obs of all agents.\n",
-    "    obs = env.observation_spec().generate_value()\n",
+    "    obs = env.observation_spec.generate_value()\n",
     "    init_x = tree.map(lambda x: x[jnp.newaxis, ...], obs)\n",
     "\n",
     "    # Initialise actor params and optimiser state.\n",

@@ -12,7 +12,7 @@ dummy_flashbax_transition = {
     "observation": jnp.zeros(
         (
             config.system.num_agents,
-            env.observation_spec().agents_view.shape[1],
+            env.observation_spec.agents_view.shape[1],
         ),
         dtype=jnp.float32,
     ),

@@ -378,7 +378,7 @@ def learner_setup(
     )
 
     # Initialise observation with obs of all agents.
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     init_x = tree.map(lambda x: x[jnp.newaxis, ...], obs)
 
     # Initialise actor params and optimiser state.
@@ -507,7 +507,7 @@ def run_experiment(_config: DictConfig) -> None:
         "observation": jnp.zeros(
             (
                 config.system.num_agents,
-                env.observation_spec().agents_view.shape[1],
+                env.observation_spec.agents_view.shape[1],
             ),
             dtype=jnp.float32,
         ),

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/ff_ippo
   - network: mlp  # [mlp, cnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, mabrax, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/ff_mappo
   - network: mlp  # [mlp, cnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, mabrax, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: sable/ff_sable
   - network: ff_retention
-  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: mat/mat
   - network: transformer
-  - env: rware # [gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, mabrax, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/rec_ippo
   - network: rnn  # [rnn, rcnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, mabrax, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -4,7 +4,7 @@ defaults:
   - arch: anakin
   - system: q_learning/rec_iql
   - network: rnn  # [rnn, rcnn]
-  - env: smax  # [cleaner, connector, gigastep, lbf, matrax, rware, smax]
+  - env: smax  # [cleaner, connector, vector-connector, gigastep, lbf, matrax, rware, smax]
 
 hydra:
   searchpath:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/rec_mappo
   - network: rnn  # [rnn, rcnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, mabrax, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: sable/rec_sable
   - network: rec_retention
-  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - env: rware  # [cleaner, connector, vector-connector, gigastep, lbf, matrax, rware, smax]
   - _self_
 
 hydra:

@@ -4,7 +4,10 @@ defaults:
   - scenario: con-5x5x3a # [con-5x5x3a, con-7x7x5a, con-10x10x10a, con-15x15x23a]
 # Further environment config details in "con-10x10x5a" file.
 
-env_name: MaConnector # Used for logging purposes.
+env_name: Connector # Used for logging purposes.
+
+# Choose whether to aggregate the list of individual rewards and use the team reward (default setting) OR use_individual_rewards=True.
+use_individual_rewards: False  # If True, use the list of individual rewards.
 
 # Defines the metric that will be used to evaluate the performance of the agent.
 # This metric is returned at the end of an experiment and can be used for hyperparameter tuning.

@@ -1,5 +1,5 @@
 # The config of the 10x10x10a scenario
-name: MaConnector-v2
+name: Connector-v2
 task_name: con-10x10x10a
 
 task_config:

@@ -1,5 +1,5 @@
 # The config of the 15x15x23a scenario
-name: MaConnector-v2
+name: Connector-v2
 task_name: con-15x15x23a
 
 task_config:

@@ -1,5 +1,5 @@
 # The config of the 5x5x3a scenario
-name: MaConnector-v2
+name: Connector-v2
 task_name: con-5x5x3a
 
 task_config:

@@ -1,5 +1,5 @@
 # The config of the 7x7x5a scenario
-name: MaConnector-v2
+name: Connector-v2
 task_name: con-7x7x5a
 
 task_config:

@@ -4,7 +4,10 @@ defaults:
   - scenario: con-5x5x3a # [con-5x5x3a, con-7x7x5a, con-10x10x10a, con-15x15x23a]
 # Further environment config details in "con-10x10x5a" file.
 
-env_name: VectorMaConnector # Used for logging purposes.
+env_name: VectorConnector # Used for logging purposes.
+
+# Choose whether to aggregate the list of individual rewards and use the team reward (default setting) OR use_individual_rewards=True.
+use_individual_rewards: True  # If True, use the list of individual rewards.
 
 # Defines the metric that will be used to evaluate the performance of the agent.
 # This metric is returned at the end of an experiment and can be used for hyperparameter tuning.

@@ -352,7 +352,7 @@ def learner_setup(
     key, actor_net_key = keys
 
     # Initialise observation: Obs for all agents.
-    init_x = env.observation_spec().generate_value()
+    init_x = env.observation_spec.generate_value()
     init_x = tree.map(lambda x: x[None, ...], init_x)
 
     _, action_space_type = get_action_head(env)

@@ -382,7 +382,7 @@ def learner_setup(
     )
 
     # Initialise observation with obs of all agents.
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     init_x = tree.map(lambda x: x[jnp.newaxis, ...], obs)
 
     # Initialise actor params and optimiser state.

@@ -366,7 +366,7 @@ def learner_setup(
     )
 
     # Initialise observation with obs of all agents.
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     init_x = tree.map(lambda x: x[jnp.newaxis, ...], obs)
 
     # Initialise actor params and optimiser state.

@@ -487,7 +487,7 @@ def learner_setup(
     )
 
     # Initialise observation with obs of all agents.
-    init_obs = env.observation_spec().generate_value()
+    init_obs = env.observation_spec.generate_value()
     init_obs = tree.map(
         lambda x: jnp.repeat(x[jnp.newaxis, ...], config.arch.num_envs, axis=0),
         init_obs,

@@ -483,7 +483,7 @@ def learner_setup(
     )
 
     # Initialise observation with obs of all agents.
-    init_obs = env.observation_spec().generate_value()
+    init_obs = env.observation_spec.generate_value()
     init_obs = tree.map(
         lambda x: jnp.repeat(x[jnp.newaxis, ...], config.arch.num_envs, axis=0),
         init_obs,

@@ -92,7 +92,7 @@ def replicate(x: Any) -> Any:
     # N: Agent
 
     # Make dummy inputs to init recurrent Q network -> need shape (T, B, N, ...)
-    init_obs = env.observation_spec().generate_value()  # (N, ...)
+    init_obs = env.observation_spec.generate_value()  # (N, ...)
     # (B, T, N, ...)
     init_obs_batched = tree.map(lambda x: x[jnp.newaxis, jnp.newaxis, ...], init_obs)
     init_term_or_trunc = jnp.zeros((1, 1, 1), dtype=bool)  # (T, B, 1)
@@ -130,7 +130,7 @@ def replicate(x: Any) -> Any:
     init_hidden_state = replicate(init_hidden_state)
 
     # Create dummy transition
-    init_acts = env.action_spec().generate_value()  # (N,)
+    init_acts = env.action_spec.generate_value()  # (N,)
     init_transition = Transition(
         obs=init_obs,  # (N, ...)
         action=init_acts,

@@ -94,7 +94,7 @@ def replicate(x: Any) -> Any:
     # N: Agent
 
     # Make dummy inputs to init recurrent Q network -> need shape (T, B, N, ...)
-    init_obs = env.observation_spec().generate_value()  # (N, ...)
+    init_obs = env.observation_spec.generate_value()  # (N, ...)
     # (B, T, N, ...)
     init_obs_batched = tree.map(lambda x: x[jnp.newaxis, jnp.newaxis, ...], init_obs)
     init_term_or_trunc = jnp.zeros((1, 1, 1), dtype=bool)  # (T, B, 1)
@@ -126,7 +126,7 @@ def replicate(x: Any) -> Any:
         dtype=float,
     )
     global_env_state_shape = (
-        env.observation_spec().generate_value().global_state[0, :].shape
+        env.observation_spec.generate_value().global_state[0, :].shape
     )  # NOTE: Env wrapper currently duplicates env state for each agent
     dummy_global_env_state = jnp.zeros(
         (
@@ -159,7 +159,7 @@ def replicate(x: Any) -> Any:
     opt_state = replicate(opt_state)
     init_hidden_state = replicate(init_hidden_state)
 
-    init_acts = env.action_spec().generate_value()
+    init_acts = env.action_spec.generate_value()
 
     # NOTE: term_or_trunc refers to the the joint done, ie. when all agents are done or when the
     # episode horizon has been reached. We use this exclusively in QMIX.

@@ -381,8 +381,8 @@ def learner_setup(
     key, net_key = keys
 
     # Get number of agents and actions.
-    action_dim = int(env.action_spec().num_values[0])
-    n_agents = env.action_spec().shape[0]
+    action_dim = int(env.action_spec.num_values[0])
+    n_agents = env.action_spec.shape[0]
     config.system.num_agents = n_agents
     config.system.num_actions = action_dim
 
@@ -419,7 +419,7 @@ def learner_setup(
     )
 
     # Get mock inputs to initialise network.
-    init_obs = env.observation_spec().generate_value()
+    init_obs = env.observation_spec.generate_value()
     init_obs = tree.map(lambda x: x[jnp.newaxis, ...], init_obs)  # Add batch dim
     init_hs = get_init_hidden_state(config.network.net_config, config.arch.num_envs)
     init_hs = tree.map(lambda x: x[0, jnp.newaxis], init_hs)

@@ -412,8 +412,8 @@ def learner_setup(
     key, net_key = keys
 
     # Get number of agents and actions.
-    action_dim = int(env.action_spec().num_values[0])
-    n_agents = env.action_spec().shape[0]
+    action_dim = int(env.action_spec.num_values[0])
+    n_agents = env.action_spec.shape[0]
     config.system.num_agents = n_agents
     config.system.num_actions = action_dim
 
@@ -445,7 +445,7 @@ def learner_setup(
     )
 
     # Get mock inputs to initialise network.
-    init_obs = env.observation_spec().generate_value()
+    init_obs = env.observation_spec.generate_value()
     init_obs = tree.map(lambda x: x[jnp.newaxis, ...], init_obs)  # Add batch dim
     init_hs = get_init_hidden_state(config.network.net_config, config.arch.num_envs)
     init_hs = tree.map(lambda x: x[0, jnp.newaxis], init_hs)

@@ -144,11 +144,11 @@ def replicate(x: Any) -> Any:
     key, actor_key, q1_key, q2_key, q1_target_key, q2_target_key = jax.random.split(key, 6)
     actor_keys = jax.random.split(actor_key, n_agents)
 
-    acts = env.action_spec().generate_value()  # all agents actions
+    acts = env.action_spec.generate_value()  # all agents actions
     act_single = acts[0]  # single agents action
     concat_acts = jnp.concatenate([act_single for _ in range(n_agents)], axis=0)
     concat_acts_batched = concat_acts[jnp.newaxis, ...]  # batch + concat of all agents actions
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     obs_single_batched = tree.map(lambda x: x[0][jnp.newaxis, ...], obs)
 
     # Making actor network
@@ -285,7 +285,7 @@ def make_update_fns(
     actor_net, q_net = networks
     actor_opt, q_opt, alpha_opt = optims
 
-    full_action_shape = (cfg.arch.num_envs, *env.action_spec().shape)
+    full_action_shape = (cfg.arch.num_envs, *env.action_spec.shape)
 
     # losses:
     def q_loss_fn(

@@ -104,9 +104,9 @@ def replicate(x: Any) -> Any:
 
     key, actor_key, q1_key, q2_key, q1_target_key, q2_target_key = jax.random.split(key, 6)
 
-    acts = env.action_spec().generate_value()  # all agents actions
+    acts = env.action_spec.generate_value()  # all agents actions
     act_single_batched = acts[0][jnp.newaxis, ...]  # batch single agent action
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     obs_single_batched = tree.map(lambda x: x[0][jnp.newaxis, ...], obs)
 
     # Making actor network
@@ -242,7 +242,7 @@ def make_update_fns(
     actor_net, q_net = networks
     actor_opt, q_opt, alpha_opt = optims
 
-    full_action_shape = (cfg.arch.num_envs, *env.action_spec().shape)
+    full_action_shape = (cfg.arch.num_envs, *env.action_spec.shape)
 
     # losses:
     def q_loss_fn(

@@ -105,11 +105,11 @@ def replicate(x: Any) -> Any:
 
     key, actor_key, q1_key, q2_key, q1_target_key, q2_target_key = jax.random.split(key, 6)
 
-    acts = env.action_spec().generate_value()  # all agents actions
+    acts = env.action_spec.generate_value()  # all agents actions
     act_single = acts[0]  # single agents action
     joint_acts = jnp.concatenate([act_single for _ in range(n_agents)], axis=0)
     joint_acts_batched = joint_acts[jnp.newaxis, ...]  # joint actions with a batch dim
-    obs = env.observation_spec().generate_value()
+    obs = env.observation_spec.generate_value()
     obs_single_batched = tree.map(lambda x: x[0][jnp.newaxis, ...], obs)
 
     # Making actor network
@@ -245,7 +245,7 @@ def make_update_fns(
     actor_net, q_net = networks
     actor_opt, q_opt, alpha_opt = optims
 
-    full_action_shape = (cfg.arch.num_envs, *env.action_spec().shape)
+    full_action_shape = (cfg.arch.num_envs, *env.action_spec.shape)
 
     # losses:
     def q_loss_fn(