diff --git a/gymnasium_robotics/envs/maze/ant_maze_v5.py b/gymnasium_robotics/envs/maze/ant_maze_v5.py
index ec314c95..edddb120 100644
--- a/gymnasium_robotics/envs/maze/ant_maze_v5.py
+++ b/gymnasium_robotics/envs/maze/ant_maze_v5.py
@@ -26,187 +26,6 @@
 
 
 class AntMazeEnv(MazeEnv, EzPickle):
-    """
-    ### Description
-
-    This environment was refactored from the [D4RL](https://github.com/Farama-Foundation/D4RL) repository, introduced by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine
-    in ["D4RL: Datasets for Deep Data-Driven Reinforcement Learning"](https://arxiv.org/abs/2004.07219).
-
-    The tasks found in the `AntMaze` environments are the same as the ones in the `PointMaze` environments. However, in this case the agent is the Ant quadruped from the main [Gymnaisum](https://gymnasium.farama.org/environments/mujoco/ant/) repository.
-    The control frequency of the ant is of `f = 20 Hz`. Each simulation timestep is of `dt=0.01` and the ant robot repeats the same action for 5 simulation steps.
-
-    ### Maze Variations
-
-    #### Maze size
-    The map variations for the mazes are the same as for `PointMaze`. The ant environments with fixed goal and reset locations are the following:
-
-    * `AntMaze_UMaze-v5`
-    * `AntMaze_BigMaze-v5`
-    * `AntMaze_HardestMaze-v5`
-
-    #### Diverse goal mazes
-    The environments with fixed reset position for the ant and randomly selected goals, also known as diverse goal, are:
-
-    * `AntMaze_BigMaze_DG-v5`
-    * `AntMaze_HardestMaze_DG-v5`
-
-    #### Diverse goal and reset mazes
-
-    Finally, the environments that select the reset and goal locations randomly are:
-
-    * `AntMaze_BigMaze_DGR-v5`
-    * `AntMaze_HardestMaze_DGR-v5`
-
-    #### Custom maze
-    Also, any of the `AntMaze` environments can be initialized with a custom maze map by setting the `maze_map` argument like follows:
-
-    ```python
-    import gymnasium as gym
-    import gymnasium_robotics
-
-    gym.register_envs(gymnasium_robotics)
-
-    example_map = [[1, 1, 1, 1, 1],
-           [1, C, 0, C, 1],
-           [1, 1, 1, 1, 1]]
-
-    env = gym.make('AntMaze_UMaze-v5', maze_map=example_map)
-    ```
-
-    ### Action Space
-    The action space is the action space of [Gymnasium/MuJoCo/Ant](https://gymnasium.farama.org/environments/mujoco/ant/#action-space):
-
-    The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
-
-    | Num | Action                                                            | Control Min | Control Max | Name (in corresponding XML file) | Joint | Type (Unit)  |
-    | --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
-    | 0   | Torque applied on the rotor between the torso and back right hip  | -1          | 1           | hip_4 (right_back_leg)           | hinge | torque (N m) |
-    | 1   | Torque applied on the rotor between the back right two links      | -1          | 1           | angle_4 (right_back_leg)         | hinge | torque (N m) |
-    | 2   | Torque applied on the rotor between the torso and front left hip  | -1          | 1           | hip_1 (front_left_leg)           | hinge | torque (N m) |
-    | 3   | Torque applied on the rotor between the front left two links      | -1          | 1           | angle_1 (front_left_leg)         | hinge | torque (N m) |
-    | 4   | Torque applied on the rotor between the torso and front right hip | -1          | 1           | hip_2 (front_right_leg)          | hinge | torque (N m) |
-    | 5   | Torque applied on the rotor between the front right two links     | -1          | 1           | angle_2 (front_right_leg)        | hinge | torque (N m) |
-    | 6   | Torque applied on the rotor between the torso and back left hip   | -1          | 1           | hip_3 (back_leg)                 | hinge | torque (N m) |
-    | 7   | Torque applied on the rotor between the back left two links       | -1          | 1           | angle_3 (back_leg)               | hinge | torque (N m) |
-
-    ### Observation Space
-    The observation is a `goal-aware observation space`. It consists of a dictionary with information about the robot's position and goal. The dictionary consists of the following 3 keys:
-
-    * `observation`: Observations consist of positional values of different body parts of the ant, followed by the velocities of those individual parts (their derivatives) with all
-        the positions ordered before all the velocities.
-
-        By default, observations do not include the x- and y-coordinates of the ant's torso. These values are included in the `achieved_goal` key of the observation.
-        However, by default, an observation is a `ndarray` with shape `(111,)` if the external contact forces are included with the `use_contact_forces` arguments. Otherwise, the shape will be `(27, )`
-        The elements of the array correspond to the following:
-
-        | Num | Observation                                                  | Min    | Max    | Name (in corresponding XML file)       | Joint | Unit                     |
-        |-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
-        | 0   | z-coordinate of the torso (centre)                           | -Inf   | Inf    | torso                                  | free  | position (m)             |
-        | 1   | x-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
-        | 2   | y-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
-        | 3   | z-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
-        | 4   | w-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
-        | 5   | angle between torso and first link on front left             | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
-        | 6   | angle between the two links on the front left                | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
-        | 7   | angle between torso and first link on front right            | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
-        | 8   | angle between the two links on the front right               | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
-        | 9   | angle between torso and first link on back left              | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
-        | 10  | angle between the two links on the back left                 | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
-        | 11  | angle between torso and first link on back right             | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
-        | 12  | angle between the two links on the back right                | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
-        | 13  | x-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
-        | 14  | y-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
-        | 15  | z-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
-        | 16  | x-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
-        | 17  | y-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
-        | 18  | z-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
-        | 19  | angular velocity of angle between torso and front left link  | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
-        | 20  | angular velocity of the angle between front left links       | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
-        | 21  | angular velocity of angle between torso and front right link | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
-        | 22  | angular velocity of the angle between front right links      | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
-        | 23  | angular velocity of angle between torso and back left link   | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
-        | 24  | angular velocity of the angle between back left links        | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
-        | 25  | angular velocity of angle between torso and back right link  | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
-        | 26  |angular velocity of the angle between back right links        | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
-
-        The remaining 14*6 = 84 elements of the observation are contact forces (external forces - force x, y, z and torque x, y, z) applied to the center of mass of each of the links. The 14 links are: the ground link,
-        the torso link, and 3 links for each leg (1 + 1 + 12) with the 6 external forces. These elements are included only if at the environments initialization the argument `use_contact_forces` is set to `True`.
-
-    * `desired_goal`: this key represents the final goal to be achieved. In this environment it is a 2-dimensional `ndarray`, `(2,)`, that consists of the two cartesian coordinates of the desired final ant torso position `[x,y]`. The elements of the array are the following:
-
-        | Num | Observation             | Min    | Max    | Site Name (in corresponding XML file) |Unit          |
-        |-----|------------------------ |--------|--------|---------------------------------------|--------------|
-        | 0   | Final goal x coordinate | -Inf   | Inf    | target                                | position (m) |
-        | 1   | Final goal y coordinate | -Inf   | Inf    | target                                | position (m) |
-
-    * `achieved_goal`: this key represents the current state of the ant's torso, as if it would have achieved a goal. This is useful for goal orientated learning algorithms such as those that use [Hindsight Experience Replay](https://arxiv.org/abs/1707.01495) (HER).
-        The value is an `ndarray` with shape `(2,)`. The elements of the array are the following:
-
-        | Num | Observation                                    | Min    | Max    | Site Name (in corresponding XML file) |Unit          |
-        |-----|------------------------------------------------|--------|--------|---------------------------------------|--------------|
-        | 0   | Current goal ant position in the x coordinate  | -Inf   | Inf    | torso                                 | position (m) |
-        | 1   | Current goal ant position in the y coordinate  | -Inf   | Inf    | torso                                 | position (m) |
-
-    ### Rewards
-
-    The reward can be initialized as `sparse` or `dense`:
-    - *sparse*: the returned reward can have two values: `0` if the ant hasn't reached its final target position, and `1` if the ant is in the final target position (the ant is considered to have reached the goal if the Euclidean distance between both is lower than 0.5 m).
-    - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
-
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v5`. However, for `dense`
-    reward the id must be modified to `AntMaze_UMazeDense-v5` and initialized as follows:
-
-    ```python
-    import gymnasium as gym
-    import gymnasium_robotics
-
-    gym.register_envs(gymnasium_robotics)
-
-    env = gym.make('AntMaze_UMaze-v5')
-    ```
-
-    ### Starting State
-    The goal and initial placement of the ant in the maze follows the same structure for all environments. A discrete cell `(i,j)` is selected for the goal and agent's initial position as previously menitoned in the **Maze** section.
-    Then this cell index is converted to its cell center as an `(x,y)` continuous Cartesian coordinates in the MuJoCo simulation. Finally, a sampled noise from a uniform distribution with range `[-0.25,0.25]m` is added to the
-    cell's center x and y coordinates. This allows to create a richer goal distribution.
-
-    The goal and initial position of the agent can also be specified by the user when the episode is reset. This is done by passing the dictionary argument `options` to the gymnasium reset() function. This dictionary expects one or both of
-    the following keys:
-
-    * `goal_cell`: `numpy.ndarray, shape=(2,0), type=int` - Specifies the desired `(i,j)` cell location of the goal. A uniform sampled noise will be added to the continuous coordinates of the center of the cell.
-    * `reset_cell`: `numpy.ndarray, shape=(2,0), type=int` - Specifies the desired `(i,j)` cell location of the reset initial agent position. A uniform sampled noise will be added to the continuous coordinates of the center of the cell.
-
-    ### Episode End
-    * `truncated` - The episode will be `truncated` when the duration reaches a total of `max_episode_steps`.
-    * `terminated` - The task can be set to be continuing with the `continuing_task` argument. In this case the episode will never terminate, instead the goal location is randomly selected again. If the task is set not to be continuing the
-    episode will be terminated when the Euclidean distance to the goal is less or equal to 0.5.
-
-    ### Arguments
-    * `maze_map` - Optional argument to initialize the environment with a custom maze map.
-    * `continuing_task` - If set to `True` the episode won't be terminated when reaching the goal, instead a new goal location will be generated (unless `reset_target` argument is `True`). If `False` the environment is terminated when the ant reaches the final goal.
-    * `reset_target` - If set to `True` and the argument `continuing_task` is also `True`, when the ant reaches the target goal the location of the goal will be kept the same and no new goal location will be generated. If `False` a new goal will be generated when reached.
-    * `xml_file` - Optional argument to Path of robot model.
-    * Optionally any other [Gymnasium/MuJoCo/Ant](https://gymnasium.farama.org/environments/mujoco/ant/#arguments/) argument such `ctrl_cost_weight`.
-
-    Note that, the maximum number of timesteps before the episode is `truncated` can be increased or decreased by specifying the `max_episode_steps` argument at initialization. For example,
-    to increase the total number of timesteps to 100 make the environment as follows:
-
-    ```python
-    import gymnasium as gym
-    import gymnasium_robotics
-
-    gym.register_envs(gymnasium_robotics)
-
-    env = gym.make('AntMaze_UMaze-v5', max_episode_steps=100)
-    ```
-
-    ### Version History
-    - v5: Is now based on `Gymnasium/MuJoCoAnt-v5/`, and inherits all features from it such as the `xml_file` argument for the loading of third party model.
-    - v4: Refactor compute_terminated in MazeEnv into a pure function compute_terminated and a new function update_goal which resets the goal position. Ant bug fix: Reward is now computed before reset (i.e. sparse reward is not always zero). Maze bug fix: Ant can no longer reset within the goal radius 0.45 due to maze_size_scaling factor missing in MazeEnv. info['success'] key added.
-    - v3: refactor version of the D4RL environment, also create dependency on newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
-    - v2 & v1: legacy versions in the [D4RL](https://github.com/Farama-Foundation/D4RL).
-    """
-
     metadata = {
         "render_modes": [
             "human",
diff --git a/gymnasium_robotics/envs/maze/ant_maze_v6.py b/gymnasium_robotics/envs/maze/ant_maze_v6.py
new file mode 100644
index 00000000..4e415574
--- /dev/null
+++ b/gymnasium_robotics/envs/maze/ant_maze_v6.py
@@ -0,0 +1,339 @@
+"""A maze environment with the Gymnasium Ant agent (https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/mujoco/ant_v5.py).
+
+The code is inspired by the D4RL repository hosted on GitHub (https://github.com/Farama-Foundation/D4RL), published in the paper
+'D4RL: Datasets for Deep Data-Driven Reinforcement Learning' by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, Sergey Levine.
+
+Original Author of the code: Justin Fu
+
+The modifications made involve reusing the code in Gymnasium for the Ant environment and in `point_maze/maze_env.py`.
+The new code also follows the Gymnasium API and Multi-goal API
+
+This project is covered by the Apache 2.0 License.
+"""
+
+import sys
+from os import path
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+from gymnasium import spaces
+from gymnasium.envs.mujoco.ant_v5 import AntEnv
+from gymnasium.utils.ezpickle import EzPickle
+
+from gymnasium_robotics.envs.maze.maps import U_MAZE
+from gymnasium_robotics.envs.maze.maze_v6 import MazeEnv
+from gymnasium_robotics.utils.mujoco_utils import MujocoModelNames
+
+
+class AntMazeEnv(MazeEnv, EzPickle):
+    """
+    ### Description
+
+    This environment was refactored from the [D4RL](https://github.com/Farama-Foundation/D4RL) repository, introduced by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine
+    in ["D4RL: Datasets for Deep Data-Driven Reinforcement Learning"](https://arxiv.org/abs/2004.07219).
+
+    The tasks found in the `AntMaze` environments are the same as the ones in the `PointMaze` environments. However, in this case the agent is the Ant quadruped from the main [Gymnaisum](https://gymnasium.farama.org/environments/mujoco/ant/) repository.
+    The control frequency of the ant is of `f = 20 Hz`. Each simulation timestep is of `dt=0.01` and the ant robot repeats the same action for 5 simulation steps.
+
+    ### Maze Variations
+
+    #### Maze size
+    The map variations for the mazes are the same as for `PointMaze`. The ant environments with fixed goal and reset locations are the following:
+
+    * `AntMaze_UMaze-v5`
+    * `AntMaze_BigMaze-v5`
+    * `AntMaze_HardestMaze-v5`
+
+    #### Diverse goal mazes
+    The environments with fixed reset position for the ant and randomly selected goals, also known as diverse goal, are:
+
+    * `AntMaze_BigMaze_DG-v5`
+    * `AntMaze_HardestMaze_DG-v5`
+
+    #### Diverse goal and reset mazes
+
+    Finally, the environments that select the reset and goal locations randomly are:
+
+    * `AntMaze_BigMaze_DGR-v5`
+    * `AntMaze_HardestMaze_DGR-v5`
+
+    #### Custom maze
+    Also, any of the `AntMaze` environments can be initialized with a custom maze map by setting the `maze_map` argument like follows:
+
+    ```python
+    import gymnasium as gym
+    import gymnasium_robotics
+
+    gym.register_envs(gymnasium_robotics)
+
+    example_map = [[1, 1, 1, 1, 1],
+           [1, C, 0, C, 1],
+           [1, 1, 1, 1, 1]]
+
+    env = gym.make('AntMaze_UMaze-v5', maze_map=example_map)
+    ```
+
+    ### Action Space
+    The action space is the action space of [Gymnasium/MuJoCo/Ant](https://gymnasium.farama.org/environments/mujoco/ant/#action-space):
+
+    The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
+
+    | Num | Action                                                            | Control Min | Control Max | Name (in corresponding XML file) | Joint | Type (Unit)  |
+    | --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+    | 0   | Torque applied on the rotor between the torso and back right hip  | -1          | 1           | hip_4 (right_back_leg)           | hinge | torque (N m) |
+    | 1   | Torque applied on the rotor between the back right two links      | -1          | 1           | angle_4 (right_back_leg)         | hinge | torque (N m) |
+    | 2   | Torque applied on the rotor between the torso and front left hip  | -1          | 1           | hip_1 (front_left_leg)           | hinge | torque (N m) |
+    | 3   | Torque applied on the rotor between the front left two links      | -1          | 1           | angle_1 (front_left_leg)         | hinge | torque (N m) |
+    | 4   | Torque applied on the rotor between the torso and front right hip | -1          | 1           | hip_2 (front_right_leg)          | hinge | torque (N m) |
+    | 5   | Torque applied on the rotor between the front right two links     | -1          | 1           | angle_2 (front_right_leg)        | hinge | torque (N m) |
+    | 6   | Torque applied on the rotor between the torso and back left hip   | -1          | 1           | hip_3 (back_leg)                 | hinge | torque (N m) |
+    | 7   | Torque applied on the rotor between the back left two links       | -1          | 1           | angle_3 (back_leg)               | hinge | torque (N m) |
+
+    ### Observation Space
+    The observation is a `goal-aware observation space`. It consists of a dictionary with information about the robot's position and goal. The dictionary consists of the following 3 keys:
+
+    * `observation`: Observations consist of positional values of different body parts of the ant, followed by the velocities of those individual parts (their derivatives) with all
+        the positions ordered before all the velocities.
+
+        By default, observations do not include the x- and y-coordinates of the ant's torso. These values are included in the `achieved_goal` key of the observation.
+        However, by default, an observation is a `ndarray` with shape `(111,)` if the external contact forces are included with the `use_contact_forces` arguments. Otherwise, the shape will be `(27, )`
+        The elements of the array correspond to the following:
+
+        | Num | Observation                                                  | Min    | Max    | Name (in corresponding XML file)       | Joint | Unit                     |
+        |-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
+        | 0   | z-coordinate of the torso (centre)                           | -Inf   | Inf    | torso                                  | free  | position (m)             |
+        | 1   | x-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+        | 2   | y-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+        | 3   | z-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+        | 4   | w-orientation of the torso (centre)                          | -Inf   | Inf    | torso                                  | free  | angle (rad)              |
+        | 5   | angle between torso and first link on front left             | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
+        | 6   | angle between the two links on the front left                | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
+        | 7   | angle between torso and first link on front right            | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
+        | 8   | angle between the two links on the front right               | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
+        | 9   | angle between torso and first link on back left              | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
+        | 10  | angle between the two links on the back left                 | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
+        | 11  | angle between torso and first link on back right             | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
+        | 12  | angle between the two links on the back right                | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
+        | 13  | x-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+        | 14  | y-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+        | 15  | z-coordinate velocity of the torso                           | -Inf   | Inf    | torso                                  | free  | velocity (m/s)           |
+        | 16  | x-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+        | 17  | y-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+        | 18  | z-coordinate angular velocity of the torso                   | -Inf   | Inf    | torso                                  | free  | angular velocity (rad/s) |
+        | 19  | angular velocity of angle between torso and front left link  | -Inf   | Inf    | hip_1 (front_left_leg)                 | hinge | angle (rad)              |
+        | 20  | angular velocity of the angle between front left links       | -Inf   | Inf    | ankle_1 (front_left_leg)               | hinge | angle (rad)              |
+        | 21  | angular velocity of angle between torso and front right link | -Inf   | Inf    | hip_2 (front_right_leg)                | hinge | angle (rad)              |
+        | 22  | angular velocity of the angle between front right links      | -Inf   | Inf    | ankle_2 (front_right_leg)              | hinge | angle (rad)              |
+        | 23  | angular velocity of angle between torso and back left link   | -Inf   | Inf    | hip_3 (back_leg)                       | hinge | angle (rad)              |
+        | 24  | angular velocity of the angle between back left links        | -Inf   | Inf    | ankle_3 (back_leg)                     | hinge | angle (rad)              |
+        | 25  | angular velocity of angle between torso and back right link  | -Inf   | Inf    | hip_4 (right_back_leg)                 | hinge | angle (rad)              |
+        | 26  |angular velocity of the angle between back right links        | -Inf   | Inf    | ankle_4 (right_back_leg)               | hinge | angle (rad)              |
+
+        The remaining 14*6 = 84 elements of the observation are contact forces (external forces - force x, y, z and torque x, y, z) applied to the center of mass of each of the links. The 14 links are: the ground link,
+        the torso link, and 3 links for each leg (1 + 1 + 12) with the 6 external forces. These elements are included only if at the environments initialization the argument `use_contact_forces` is set to `True`.
+
+    * `desired_goal`: this key represents the final goal to be achieved. In this environment it is a 2-dimensional `ndarray`, `(2,)`, that consists of the two cartesian coordinates of the desired final ant torso position `[x,y]`. The elements of the array are the following:
+
+        | Num | Observation             | Min    | Max    | Site Name (in corresponding XML file) |Unit          |
+        |-----|------------------------ |--------|--------|---------------------------------------|--------------|
+        | 0   | Final goal x coordinate | -Inf   | Inf    | target                                | position (m) |
+        | 1   | Final goal y coordinate | -Inf   | Inf    | target                                | position (m) |
+
+    * `achieved_goal`: this key represents the current state of the ant's torso, as if it would have achieved a goal. This is useful for goal orientated learning algorithms such as those that use [Hindsight Experience Replay](https://arxiv.org/abs/1707.01495) (HER).
+        The value is an `ndarray` with shape `(2,)`. The elements of the array are the following:
+
+        | Num | Observation                                    | Min    | Max    | Site Name (in corresponding XML file) |Unit          |
+        |-----|------------------------------------------------|--------|--------|---------------------------------------|--------------|
+        | 0   | Current goal ant position in the x coordinate  | -Inf   | Inf    | torso                                 | position (m) |
+        | 1   | Current goal ant position in the y coordinate  | -Inf   | Inf    | torso                                 | position (m) |
+
+    ### Rewards
+
+    The reward can be initialized as `sparse` or `dense`:
+    - *sparse*: the returned reward can have two values: `0` if the ant hasn't reached its final target position, and `1` if the ant is in the final target position (the ant is considered to have reached the goal if the Euclidean distance between both is lower than 0.5 m).
+    - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
+
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v5`. However, for `dense`
+    reward the id must be modified to `AntMaze_UMazeDense-v5` and initialized as follows:
+
+    ```python
+    import gymnasium as gym
+    import gymnasium_robotics
+
+    gym.register_envs(gymnasium_robotics)
+
+    env = gym.make('AntMaze_UMaze-v5')
+    ```
+
+    ### Starting State
+    The goal and initial placement of the ant in the maze follows the same structure for all environments. A discrete cell `(i,j)` is selected for the goal and agent's initial position as previously menitoned in the **Maze** section.
+    Then this cell index is converted to its cell center as an `(x,y)` continuous Cartesian coordinates in the MuJoCo simulation. Finally, a sampled noise from a uniform distribution with range `[-0.25,0.25]m` is added to the
+    cell's center x and y coordinates. This allows to create a richer goal distribution.
+
+    The goal and initial position of the agent can also be specified by the user when the episode is reset. This is done by passing the dictionary argument `options` to the gymnasium reset() function. This dictionary expects one or both of
+    the following keys:
+
+    * `goal_cell`: `numpy.ndarray, shape=(2,0), type=int` - Specifies the desired `(i,j)` cell location of the goal. A uniform sampled noise will be added to the continuous coordinates of the center of the cell.
+    * `reset_cell`: `numpy.ndarray, shape=(2,0), type=int` - Specifies the desired `(i,j)` cell location of the reset initial agent position. A uniform sampled noise will be added to the continuous coordinates of the center of the cell.
+
+    ### Episode End
+    * `truncated` - The episode will be `truncated` when the duration reaches a total of `max_episode_steps`.
+    * `terminated` - The task can be set to be continuing with the `continuing_task` argument. In this case the episode will never terminate, instead the goal location is randomly selected again. If the task is set not to be continuing the
+    episode will be terminated when the Euclidean distance to the goal is less or equal to 0.5.
+
+    ### Arguments
+    * `maze_map` - Optional argument to initialize the environment with a custom maze map.
+    * `continuing_task` - If set to `True` the episode won't be terminated when reaching the goal, instead a new goal location will be generated (unless `reset_target` argument is `True`). If `False` the environment is terminated when the ant reaches the final goal.
+    * `reset_target` - If set to `True` and the argument `continuing_task` is also `True`, when the ant reaches the target goal the location of the goal will be kept the same and no new goal location will be generated. If `False` a new goal will be generated when reached.
+    * `xml_file` - Optional argument to Path of robot model.
+    * Optionally any other [Gymnasium/MuJoCo/Ant](https://gymnasium.farama.org/environments/mujoco/ant/#arguments/) argument such `ctrl_cost_weight`.
+
+    Note that, the maximum number of timesteps before the episode is `truncated` can be increased or decreased by specifying the `max_episode_steps` argument at initialization. For example,
+    to increase the total number of timesteps to 100 make the environment as follows:
+
+    ```python
+    import gymnasium as gym
+    import gymnasium_robotics
+
+    gym.register_envs(gymnasium_robotics)
+
+    env = gym.make('AntMaze_UMaze-v5', max_episode_steps=100)
+    ```
+
+    ### Version History
+    - v6: No changes thus far
+    - v5: Is now based on `Gymnasium/MuJoCoAnt-v5/`, and inherits all features from it such as the `xml_file` argument for the loading of third party model.
+    - v4: Refactor compute_terminated in MazeEnv into a pure function compute_terminated and a new function update_goal which resets the goal position. Ant bug fix: Reward is now computed before reset (i.e. sparse reward is not always zero). Maze bug fix: Ant can no longer reset within the goal radius 0.45 due to maze_size_scaling factor missing in MazeEnv. info['success'] key added.
+    - v3: refactor version of the D4RL environment, also create dependency on newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
+    - v2 & v1: legacy versions in the [D4RL](https://github.com/Farama-Foundation/D4RL).
+    """
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+            "depth_array",
+        ],
+        "render_fps": 50,
+    }
+
+    def __init__(
+        self,
+        render_mode: Optional[str] = None,
+        maze_map: List[List[Union[str, int]]] = U_MAZE,
+        reward_type: str = "sparse",
+        continuing_task: bool = True,
+        reset_target: bool = False,
+        xml_file: Union[str, None] = None,
+        **kwargs,
+    ):
+        if xml_file is None:
+            # Get the ant.xml path from the Gymnasium package
+            ant_xml_file_path = path.join(
+                path.dirname(sys.modules[AntEnv.__module__].__file__), "assets/ant.xml"
+            )
+        else:
+            ant_xml_file_path = xml_file
+        super().__init__(
+            agent_xml_path=ant_xml_file_path,
+            maze_map=maze_map,
+            maze_size_scaling=4,
+            maze_height=0.5,
+            reward_type=reward_type,
+            continuing_task=continuing_task,
+            reset_target=reset_target,
+            **kwargs,
+        )
+        # Create the MuJoCo environment, include position observation of the Ant for GoalEnv
+        self.ant_env = AntEnv(
+            xml_file=self.tmp_xml_file_path,
+            exclude_current_positions_from_observation=False,
+            render_mode=render_mode,
+            reset_noise_scale=0.0,
+            **kwargs,
+        )
+        self._model_names = MujocoModelNames(self.ant_env.model)
+        self.target_site_id = self._model_names.site_name2id["target"]
+
+        self.action_space = self.ant_env.action_space
+        obs_shape: tuple = self.ant_env.observation_space.shape
+        self.observation_space = spaces.Dict(
+            dict(
+                observation=spaces.Box(
+                    -np.inf, np.inf, shape=(obs_shape[0] - 2,), dtype="float64"
+                ),
+                achieved_goal=spaces.Box(-np.inf, np.inf, shape=(2,), dtype="float64"),
+                desired_goal=spaces.Box(-np.inf, np.inf, shape=(2,), dtype="float64"),
+            )
+        )
+
+        self.render_mode = render_mode
+        EzPickle.__init__(
+            self,
+            render_mode,
+            maze_map,
+            reward_type,
+            continuing_task,
+            reset_target,
+            **kwargs,
+        )
+
+    def reset(self, *, seed: Optional[int] = None, **kwargs):
+        super().reset(seed=seed, **kwargs)
+
+        self.ant_env.init_qpos[:2] = self.reset_pos
+
+        obs, info = self.ant_env.reset(seed=seed)
+        obs_dict = self._get_obs(obs)
+        info["success"] = bool(
+            np.linalg.norm(obs_dict["achieved_goal"] - self.goal) <= 0.45
+        )
+
+        return obs_dict, info
+
+    def step(self, action):
+        ant_obs, _, _, _, info = self.ant_env.step(action)
+        obs = self._get_obs(ant_obs)
+
+        reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
+        terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info)
+        truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info)
+        info["success"] = bool(np.linalg.norm(obs["achieved_goal"] - self.goal) <= 0.45)
+
+        if self.render_mode == "human":
+            self.render()
+
+        # Update the goal position if necessary
+        self.update_goal(obs["achieved_goal"])
+
+        return obs, reward, terminated, truncated, info
+
+    def _get_obs(self, ant_obs: np.ndarray) -> Dict[str, np.ndarray]:
+        achieved_goal = ant_obs[:2]
+        observation = ant_obs[2:]
+
+        return {
+            "observation": observation.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.copy(),
+        }
+
+    def update_target_site_pos(self):
+        self.ant_env.model.site_pos[self.target_site_id] = np.append(
+            self.goal, self.maze.maze_height / 2 * self.maze.maze_size_scaling
+        )
+
+    def render(self):
+        return self.ant_env.render()
+
+    def close(self):
+        super().close()
+        self.ant_env.close()
+
+    @property
+    def model(self):
+        return self.ant_env.model
+
+    @property
+    def data(self):
+        return self.ant_env.data
diff --git a/gymnasium_robotics/envs/maze/maze_v6.py b/gymnasium_robotics/envs/maze/maze_v6.py
new file mode 100644
index 00000000..0c2d394e
--- /dev/null
+++ b/gymnasium_robotics/envs/maze/maze_v6.py
@@ -0,0 +1,422 @@
+"""A maze environment with Gymnasium API for the Gymnasium-Robotics PointMaze environments.
+
+The code is inspired by the D4RL repository hosted on GitHub (https://github.com/Farama-Foundation/D4RL), published in the paper
+'D4RL: Datasets for Deep Data-Driven Reinforcement Learning' by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, Sergey Levine.
+
+Original Author of the code: Justin Fu
+
+The modifications made involve organizing the code into different files: `maps.py`, `maze_env.py`, `point_env.py`, and `point_maze_env.py`.
+As well as adding support for the Gymnasium API.
+
+This project is covered by the Apache 2.0 License.
+"""
+
+import math
+import tempfile
+import time
+import xml.etree.ElementTree as ET
+from os import path
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+
+from gymnasium_robotics.core import GoalEnv
+from gymnasium_robotics.envs.maze.maps import COMBINED, GOAL, RESET, U_MAZE
+
+
+class Maze:
+    r"""This class creates and holds information about the maze in the MuJoCo simulation.
+
+    The accessible attributes are the following:
+    - :attr:`maze_map` - The maze discrete data structure.
+    - :attr:`maze_size_scaling` - The maze scaling for the continuous coordinates in the MuJoCo simulation.
+    - :attr:`maze_height` - The height of the walls in the MuJoCo simulation.
+    - :attr:`unique_goal_locations` - All the `(i,j)` possible cell indices for goal locations.
+    - :attr:`unique_reset_locations` - All the `(i,j)` possible cell indices for agent initialization locations.
+    - :attr:`combined_locations` - All the `(i,j)` possible cell indices for goal and agent initialization locations.
+    - :attr:`map_length` - Maximum value of j cell index
+    - :attr:`map_width` - Mazimum value of i cell index
+    - :attr:`x_map_center` - The x coordinate of the map's center
+    - :attr:`y_map_center` - The y coordinate of the map's center
+
+    The Maze class also presents a method to convert from cell indices to `(x,y)` coordinates in the MuJoCo simulation:
+    - :meth:`cell_rowcol_to_xy` - Convert from `(i,j)` to `(x,y)`
+
+    ### Version History
+    * v4: Refactor compute_terminated into a pure function compute_terminated and a new function update_goal which resets the goal position. Bug fix: missing maze_size_scaling factor added in generate_reset_pos() -- only affects AntMaze.
+    * v3: refactor version of the D4RL environment, also create dependency on newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
+    * v2 & v1: legacy versions in the [D4RL](https://github.com/Farama-Foundation/D4RL).
+    """
+
+    def __init__(
+        self,
+        maze_map: List[List[Union[str, int]]],
+        maze_size_scaling: float,
+        maze_height: float,
+    ):
+
+        self._maze_map = maze_map
+        self._maze_size_scaling = maze_size_scaling
+        self._maze_height = maze_height
+
+        self._unique_goal_locations = []
+        self._unique_reset_locations = []
+        self._combined_locations = []
+
+        # Get the center cell Cartesian position of the maze. This will be the origin
+        self._map_length = len(maze_map)
+        self._map_width = len(maze_map[0])
+        self._x_map_center = self.map_width / 2 * maze_size_scaling
+        self._y_map_center = self.map_length / 2 * maze_size_scaling
+
+    @property
+    def maze_map(self) -> List[List[Union[str, int]]]:
+        """Returns the list[list] data structure of the maze."""
+        return self._maze_map
+
+    @property
+    def maze_size_scaling(self) -> float:
+        """Returns the scaling value used to integrate the maze
+        encoding in the MuJoCo simulation.
+        """
+        return self._maze_size_scaling
+
+    @property
+    def maze_height(self) -> float:
+        """Returns the un-scaled height of the walls in the MuJoCo
+        simulation.
+        """
+        return self._maze_height
+
+    @property
+    def unique_goal_locations(self) -> List[np.ndarray]:
+        """Returns all the possible goal locations in discrete cell
+        coordinates (i,j)
+        """
+        return self._unique_goal_locations
+
+    @property
+    def unique_reset_locations(self) -> List[np.ndarray]:
+        """Returns all the possible reset locations for the agent in
+        discrete cell coordinates (i,j)
+        """
+        return self._unique_reset_locations
+
+    @property
+    def combined_locations(self) -> List[np.ndarray]:
+        """Returns all the possible goal/reset locations in discrete cell
+        coordinates (i,j)
+        """
+        return self._combined_locations
+
+    @property
+    def map_length(self) -> int:
+        """Returns the length of the maze in number of discrete vertical cells
+        or number of rows i.
+        """
+        return self._map_length
+
+    @property
+    def map_width(self) -> int:
+        """Returns the width of the maze in number of discrete horizontal cells
+        or number of columns j.
+        """
+        return self._map_width
+
+    @property
+    def x_map_center(self) -> float:
+        """Returns the x coordinate of the center of the maze in the MuJoCo simulation"""
+        return self._x_map_center
+
+    @property
+    def y_map_center(self) -> float:
+        """Returns the x coordinate of the center of the maze in the MuJoCo simulation"""
+        return self._y_map_center
+
+    def cell_rowcol_to_xy(self, rowcol_pos: np.ndarray) -> np.ndarray:
+        """Converts a cell index `(i,j)` to x and y coordinates in the MuJoCo simulation"""
+        x = (rowcol_pos[1] + 0.5) * self.maze_size_scaling - self.x_map_center
+        y = self.y_map_center - (rowcol_pos[0] + 0.5) * self.maze_size_scaling
+
+        return np.array([x, y])
+
+    def cell_xy_to_rowcol(self, xy_pos: np.ndarray) -> np.ndarray:
+        """Converts a cell x and y coordinates to `(i,j)`"""
+        i = math.floor((self.y_map_center - xy_pos[1]) / self.maze_size_scaling)
+        j = math.floor((xy_pos[0] + self.x_map_center) / self.maze_size_scaling)
+        return np.array([i, j])
+
+    @classmethod
+    def make_maze(
+        cls,
+        agent_xml_path: str,
+        maze_map: list,
+        maze_size_scaling: float,
+        maze_height: float,
+    ):
+        """Class method that returns an instance of Maze with a decoded maze information and the temporal
+           path to the new MJCF (xml) file for the MuJoCo simulation.
+
+        Args:
+            agent_xml_path (str): the goal that was achieved during execution
+            maze_map (list[list[str,int]]): the desired goal that we asked the agent to attempt to achieve
+            maze_size_scaling (float): an info dictionary with additional information
+            maze_height (float): an info dictionary with additional information
+
+        Returns:
+            Maze: The reward that corresponds to the provided achieved goal w.r.t. to the desired
+            goal. Note that the following should always hold true:
+            str: The xml temporal file to the new mjcf model with the included maze.
+        """
+        tree = ET.parse(agent_xml_path)
+        worldbody = tree.find(".//worldbody")
+
+        maze = cls(maze_map, maze_size_scaling, maze_height)
+        empty_locations = []
+        for i in range(maze.map_length):
+            for j in range(maze.map_width):
+                struct = maze_map[i][j]
+                # Store cell locations in simulation global Cartesian coordinates
+                x = (j + 0.5) * maze_size_scaling - maze.x_map_center
+                y = maze.y_map_center - (i + 0.5) * maze_size_scaling
+                if struct == 1:  # Unmovable block.
+                    # Offset all coordinates so that maze is centered.
+                    ET.SubElement(
+                        worldbody,
+                        "geom",
+                        name=f"block_{i}_{j}",
+                        pos=f"{x} {y} {maze_height / 2 * maze_size_scaling}",
+                        size=f"{0.5 * maze_size_scaling} {0.5 * maze_size_scaling} {maze_height / 2 * maze_size_scaling}",
+                        type="box",
+                        material="",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.7 0.5 0.3 1.0",
+                    )
+
+                elif struct == RESET:
+                    maze._unique_reset_locations.append(np.array([x, y]))
+                elif struct == GOAL:
+                    maze._unique_goal_locations.append(np.array([x, y]))
+                elif struct == COMBINED:
+                    maze._combined_locations.append(np.array([x, y]))
+                elif struct == 0:
+                    empty_locations.append(np.array([x, y]))
+
+        # Add target site for visualization
+        ET.SubElement(
+            worldbody,
+            "site",
+            name="target",
+            pos=f"0 0 {maze_height / 2 * maze_size_scaling}",
+            size=f"{0.2 * maze_size_scaling}",
+            rgba="1 0 0 0.7",
+            type="sphere",
+        )
+
+        # Add the combined cell locations (goal/reset) to goal and reset
+        if (
+            not maze._unique_goal_locations
+            and not maze._unique_reset_locations
+            and not maze._combined_locations
+        ):
+            # If there are no given "r", "g" or "c" cells in the maze data structure,
+            # any empty cell can be a reset or goal location at initialization.
+            maze._combined_locations = empty_locations
+        elif not maze._unique_reset_locations and not maze._combined_locations:
+            # If there are no given "r" or "c" cells in the maze data structure,
+            # any empty cell can be a reset location at initialization.
+            maze._unique_reset_locations = empty_locations
+        elif not maze._unique_goal_locations and not maze._combined_locations:
+            # If there are no given "g" or "c" cells in the maze data structure,
+            # any empty cell can be a gaol location at initialization.
+            maze._unique_goal_locations = empty_locations
+
+        maze._unique_goal_locations += maze._combined_locations
+        maze._unique_reset_locations += maze._combined_locations
+
+        # Save new xml with maze to a temporary file
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            temp_xml_name = f"ant_maze{str(time.time())}.xml"
+            temp_xml_path = path.join(path.dirname(tmp_dir), temp_xml_name)
+            tree.write(temp_xml_path)
+
+        return maze, temp_xml_path
+
+
+class MazeEnv(GoalEnv):
+    def __init__(
+        self,
+        agent_xml_path: str,
+        reward_type: str = "dense",
+        continuing_task: bool = True,
+        reset_target: bool = True,
+        maze_map: List[List[Union[int, str]]] = U_MAZE,
+        maze_size_scaling: float = 1.0,
+        maze_height: float = 0.5,
+        position_noise_range: float = 0.25,
+        **kwargs,
+    ):
+
+        self.reward_type = reward_type
+        self.continuing_task = continuing_task
+        self.reset_target = reset_target
+        self.maze, self.tmp_xml_file_path = Maze.make_maze(
+            agent_xml_path, maze_map, maze_size_scaling, maze_height
+        )
+
+        self.position_noise_range = position_noise_range
+
+    def generate_target_goal(self) -> np.ndarray:
+        assert len(self.maze.unique_goal_locations) > 0
+        goal_index = self.np_random.integers(
+            low=0, high=len(self.maze.unique_goal_locations)
+        )
+        goal = self.maze.unique_goal_locations[goal_index].copy()
+        return goal
+
+    def generate_reset_pos(self) -> np.ndarray:
+        assert len(self.maze.unique_reset_locations) > 0
+
+        # While reset position is close to goal position
+        reset_pos = self.goal.copy()
+        while (
+            np.linalg.norm(reset_pos - self.goal) <= 0.5 * self.maze.maze_size_scaling
+        ):
+            reset_index = self.np_random.integers(
+                low=0, high=len(self.maze.unique_reset_locations)
+            )
+            reset_pos = self.maze.unique_reset_locations[reset_index].copy()
+
+        return reset_pos
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[Dict[str, Optional[np.ndarray]]] = None,
+    ):
+        """Reset the maze simulation.
+
+        Args:
+            options (dict[str, np.ndarray]): the options dictionary can contain two items, "goal_cell" and "reset_cell" that will set the initial goal and reset location (i,j) in the self.maze.map list of list maze structure.
+
+        """
+        super().reset(seed=seed)
+
+        if options is None:
+            goal = self.generate_target_goal()
+            # Add noise to goal position
+            self.goal = self.add_xy_position_noise(goal)
+            reset_pos = self.generate_reset_pos()
+        else:
+            if "goal_cell" in options and options["goal_cell"] is not None:
+                # assert that goal cell is valid
+                assert self.maze.map_length > options["goal_cell"][0]
+                assert self.maze.map_width > options["goal_cell"][1]
+                assert (
+                    self.maze.maze_map[options["goal_cell"][0]][options["goal_cell"][1]]
+                    != 1
+                ), f"Goal can't be placed in a wall cell, {options['goal_cell']}"
+
+                goal = self.maze.cell_rowcol_to_xy(options["goal_cell"])
+
+            else:
+                goal = self.generate_target_goal()
+
+            # Add noise to goal position
+            self.goal = self.add_xy_position_noise(goal)
+
+            if "reset_cell" in options and options["reset_cell"] is not None:
+                # assert that goal cell is valid
+                assert self.maze.map_length > options["reset_cell"][0]
+                assert self.maze.map_width > options["reset_cell"][1]
+                assert (
+                    self.maze.maze_map[options["reset_cell"][0]][
+                        options["reset_cell"][1]
+                    ]
+                    != 1
+                ), f"Reset can't be placed in a wall cell, {options['reset_cell']}"
+
+                reset_pos = self.maze.cell_rowcol_to_xy(options["reset_cell"])
+
+            else:
+                reset_pos = self.generate_reset_pos()
+
+        # Update the position of the target site for visualization
+        self.update_target_site_pos()
+        # Add noise to reset position
+        self.reset_pos = self.add_xy_position_noise(reset_pos)
+
+        # Update the position of the target site for visualization
+        self.update_target_site_pos()
+
+    def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray:
+        """Pass an x,y coordinate and it will return the same coordinate with a noise addition
+        sampled from a uniform distribution
+        """
+        noise_x = (
+            self.np_random.uniform(
+                low=-self.position_noise_range, high=self.position_noise_range
+            )
+            * self.maze.maze_size_scaling
+        )
+        noise_y = (
+            self.np_random.uniform(
+                low=-self.position_noise_range, high=self.position_noise_range
+            )
+            * self.maze.maze_size_scaling
+        )
+        xy_pos[0] += noise_x
+        xy_pos[1] += noise_y
+
+        return xy_pos
+
+    def compute_reward(
+        self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
+    ) -> float:
+        distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
+        if self.reward_type == "dense":
+            return np.exp(-distance)
+        elif self.reward_type == "sparse":
+            return (distance <= 0.45).astype(np.float64)
+
+    def compute_terminated(
+        self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
+    ) -> bool:
+        if not self.continuing_task:
+            # If task is episodic terminate the episode when the goal is reached
+            return bool(np.linalg.norm(achieved_goal - desired_goal) <= 0.45)
+        else:
+            # Continuing tasks don't terminate, episode will be truncated when time limit is reached (`max_episode_steps`)
+            return False
+
+    def update_goal(self, achieved_goal: np.ndarray) -> None:
+        """Update goal position if continuing task and within goal radius."""
+
+        if (
+            self.continuing_task
+            and self.reset_target
+            and bool(np.linalg.norm(achieved_goal - self.goal) <= 0.45)
+            and len(self.maze.unique_goal_locations) > 1
+        ):
+            # Generate a goal while within 0.45 of achieved_goal. The distance check above
+            # is not redundant, it avoids calling update_target_site_pos() unless necessary
+            while np.linalg.norm(achieved_goal - self.goal) <= 0.45:
+                # Generate another goal
+                goal = self.generate_target_goal()
+                # Add noise to goal position
+                self.goal = self.add_xy_position_noise(goal)
+
+            # Update the position of the target site for visualization
+            self.update_target_site_pos()
+
+    def compute_truncated(
+        self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
+    ) -> bool:
+        return False
+
+    def update_target_site_pos(self, pos):
+        """Override this method to update the site qpos in the MuJoCo simulation
+        after a new goal is selected. This is mainly for visualization purposes."""
+        raise NotImplementedError