diff --git a/gymnasium_robotics/envs/maze/ant_maze_v5.py b/gymnasium_robotics/envs/maze/ant_maze_v5.py index aa443833..1635cee1 100644 --- a/gymnasium_robotics/envs/maze/ant_maze_v5.py +++ b/gymnasium_robotics/envs/maze/ant_maze_v5.py @@ -1,4 +1,4 @@ -"""A maze environment with the Gymnasium Ant agent (https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/mujoco/ant_v4.py). +"""A maze environment with the Gymnasium Ant agent (https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/mujoco/ant_v5.py). The code is inspired by the D4RL repository hosted on GitHub (https://github.com/Farama-Foundation/D4RL), published in the paper 'D4RL: Datasets for Deep Data-Driven Reinforcement Learning' by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, Sergey Levine. @@ -40,22 +40,22 @@ class AntMazeEnv(MazeEnv, EzPickle): #### Maze size The map variations for the mazes are the same as for `PointMaze`. The ant environments with fixed goal and reset locations are the following: - * `AntMaze_UMaze-v4` - * `AntMaze_BigMaze-v4` - * `AntMaze_HardestMaze-v4` + * `AntMaze_UMaze-v5` + * `AntMaze_BigMaze-v5` + * `AntMaze_HardestMaze-v5` #### Diverse goal mazes The environments with fixed reset position for the ant and randomly selected goals, also known as diverse goal, are: - * `AntMaze_BigMaze_DG-v4` - * `AntMaze_HardestMaze_DG-v4` + * `AntMaze_BigMaze_DG-v5` + * `AntMaze_HardestMaze_DG-v5` #### Diverse goal and reset mazes Finally, the environments that select the reset and goal locations randomly are: - * `AntMaze_BigMaze_DGR-v4` - * `AntMaze_HardestMaze_DGR-v4` + * `AntMaze_BigMaze_DGR-v5` + * `AntMaze_HardestMaze_DGR-v5` #### Custom maze Also, any of the `AntMaze` environments can be initialized with a custom maze map by setting the `maze_map` argument like follows: @@ -70,7 +70,7 @@ class AntMazeEnv(MazeEnv, EzPickle): [1, C, 0, C, 1], [1, 1, 1, 1, 1]] - env = gym.make('AntMaze_UMaze-v4', maze_map=example_map) + env = gym.make('AntMaze_UMaze-v5', maze_map=example_map) ``` ### Action Space @@ -153,8 +153,8 @@ class AntMazeEnv(MazeEnv, EzPickle): - *sparse*: the returned reward can have two values: `0` if the ant hasn't reached its final target position, and `1` if the ant is in the final target position (the ant is considered to have reached the goal if the Euclidean distance between both is lower than 0.5 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v4`. However, for `dense` - reward the id must be modified to `AntMaze_UMazeDense-v4` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v5`. However, for `dense` + reward the id must be modified to `AntMaze_UMazeDense-v5` and initialized as follows: ```python import gymnasium as gym @@ -197,7 +197,7 @@ class AntMazeEnv(MazeEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('AntMaze_UMaze-v4', max_episode_steps=100) + env = gym.make('AntMaze_UMaze-v5', max_episode_steps=100) ``` ### Version History