diff --git a/benchmarl/conf/experiment/base_experiment.yaml b/benchmarl/conf/experiment/base_experiment.yaml index bb6f6d94..89566850 100644 --- a/benchmarl/conf/experiment/base_experiment.yaml +++ b/benchmarl/conf/experiment/base_experiment.yaml @@ -82,7 +82,7 @@ render: True evaluation_interval: 120_000 # Number of episodes that evaluation is run on evaluation_episodes: 10 -# If True, when stochastic policies are evaluated, their mode is taken, otherwise, if False, they are sampled +# If True, when stochastic policies are evaluated, their deterministic value is taken, otherwise, if False, they are sampled evaluation_deterministic_actions: True # List of loggers to use, options are: wandb, csv, tensorboard, mflow diff --git a/benchmarl/experiment/experiment.py b/benchmarl/experiment/experiment.py index 90ef568b..b09a7abc 100644 --- a/benchmarl/experiment/experiment.py +++ b/benchmarl/experiment/experiment.py @@ -582,7 +582,12 @@ def _collection_loop(self): auto_reset=False, tensordict=reset_batch, ) - reset_batch = step_mdp(batch[..., -1]) + reset_batch = step_mdp( + batch[..., -1], + reward_keys=self.rollout_env.reward_keys, + action_keys=self.rollout_env.action_keys, + done_keys=self.rollout_env.done_keys, + ) # Logging collection collection_time = time.time() - iteration_start diff --git a/test/test_vmas.py b/test/test_vmas.py index 3dfc4f81..9a6eb6cc 100644 --- a/test/test_vmas.py +++ b/test/test_vmas.py @@ -74,6 +74,24 @@ def test_all_tasks( ) experiment.run() + def test_collect_with_grad( + self, + experiment_config, + mlp_sequence_config, + algo_config: AlgorithmConfig = IppoConfig, + task: Task = VmasTask.BALANCE, + ): + task = task.get_from_yaml() + experiment_config.collect_with_grad = True + experiment = Experiment( + algorithm_config=algo_config.get_from_yaml(), + model_config=mlp_sequence_config, + seed=0, + config=experiment_config, + task=task, + ) + experiment.run() + @pytest.mark.parametrize( "algo_config", [IppoConfig, QmixConfig, IsacConfig, IddpgConfig] )