Update in favor of modern numpy and gym/gymnasium versions (#38)

* Switch to new gym api, compatible with latest numpy version * Update test * gym to gymnasium * Update test
EvolutionGym · Jun 13, 2024 · 79de3fe · 79de3fe
1 parent 0919b35
commit 79de3fe
Show file tree

Hide file tree

Showing 15 changed files with 614 additions and 355 deletions.
diff --git a/README.md b/README.md
@@ -70,23 +70,22 @@ python gym_test.py
 Alternatively, you can run the following snippet:
 
 ```python
-import gym
+import gymnasium as gym
 import evogym.envs
 from evogym import sample_robot
 
 
 if __name__ == '__main__':
 
     body, connections = sample_robot((5,5))
-    env = gym.make('Walker-v0', body=body)
+    env = gym.make('Walker-v0', body=body, render_mode='human')
     env.reset()
 
     while True:
         action = env.action_space.sample()-1
-        ob, reward, done, info = env.step(action)
-        env.render()
+        ob, reward, terminated, truncated, info = env.step(action)
 
-        if done:
+        if terminated or truncated:
             env.reset()
 
     env.close()

diff --git a/evogym/envs/__init__.py b/evogym/envs/__init__.py
@@ -10,7 +10,7 @@
 from evogym.envs.traverse import *
 from evogym.envs.walk import *
 
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 ## SIMPLE ##
 register(

diff --git a/evogym/envs/balance.py b/evogym/envs/balance.py
@@ -1,7 +1,7 @@
-import gym
-from gym import error, spaces
-from gym import utils
-from gym.utils import seeding
+import gymnasium as gym
+from gymnasium import error, spaces
+from gymnasium import utils
+from gymnasium.utils import seeding
 
 from evogym import *
 from evogym.envs import BenchmarkBase
@@ -10,24 +10,31 @@
 import math
 import numpy as np
 import os
+from typing import Dict, Any, Optional
 
 class Balance(BenchmarkBase):
 
-    def __init__(self, body, connections=None):
+    def __init__(
+        self,
+        body: np.ndarray,
+        connections: Optional[np.ndarray] = None,
+        render_mode: Optional[str] = None,
+        render_options: Optional[Dict[str, Any]] = None,
+    ):
 
         # make world
         self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v0.json'))
         self.world.add_from_array('robot', body, 15, 3, connections=connections)
 
         # init sim
-        BenchmarkBase.__init__(self, self.world)
+        BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options)
 
         # set action space and observation space
         num_actuators = self.get_actuator_indices('robot').size
         num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size
 
-        self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float)
-        self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float)
+        self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float)
+        self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float)
 
     def get_obs(self, pos_final):
         com_final = np.mean(pos_final, 1)
@@ -71,39 +78,45 @@ def step(self, action):
             print("SIMULATION UNSTABLE... TERMINATING")
             reward -= 3.0
 
-        # observation, reward, has simulation met termination conditions, debugging info
-        return obs, reward, done, {}
+        # observation, reward, has simulation met termination conditions, truncated, debugging info
+        return obs, reward, done, False, {}
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
 
-        super().reset()
+        super().reset(seed=seed, options=options)
 
         # observation
         obs = np.concatenate((
             self.get_ort_obs("robot"),
             self.get_relative_pos_obs("robot"),
             ))
 
-        return obs
+        return obs, {}
 
 
 class BalanceJump(BenchmarkBase):
 
-    def __init__(self, body, connections=None):
+    def __init__(
+        self,
+        body: np.ndarray,
+        connections: Optional[np.ndarray] = None,
+        render_mode: Optional[str] = None,
+        render_options: Optional[Dict[str, Any]] = None,
+    ):
 
         # make world
         self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v1.json'))
         self.world.add_from_array('robot', body, 10, 1, connections=connections)
 
         # init sim
-        BenchmarkBase.__init__(self, self.world)
+        BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options)
 
         # set action space and observation space
         num_actuators = self.get_actuator_indices('robot').size
         num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size
 
-        self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float)
-        self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float)
+        self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float)
+        self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float)
 
     def get_obs(self, pos_final):
         com_final = np.mean(pos_final, 1)
@@ -148,17 +161,17 @@ def step(self, action):
             print("SIMULATION UNSTABLE... TERMINATING")
             reward -= 3.0
 
-        # observation, reward, has simulation met termination conditions, debugging info
-        return obs, reward, done, {}
+        # observation, reward, has simulation met termination conditions, truncated, debugging info
+        return obs, reward, done, False, {}
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
 
-        super().reset()
+        super().reset(seed=seed, options=options)
 
         # observation
         obs = np.concatenate((
             self.get_ort_obs("robot"),
             self.get_relative_pos_obs("robot"),
             ))
 
-        return obs
+        return obs, {}
diff --git a/evogym/envs/base.py b/evogym/envs/base.py
@@ -1,10 +1,10 @@
 
-import gym
-from gym import error, spaces
-from gym import utils
-from gym.utils import seeding
+import gymnasium as gym
+from gymnasium import error, spaces
+from gymnasium import utils
+from gymnasium.utils import seeding
 
-from typing import Dict, Optional, List
+from typing import Dict, Optional, List, Any
 from evogym import *
 
 import random
@@ -19,20 +19,34 @@ class EvoGymBase(gym.Env):
 
     Args:
         world (EvoWorld): object specifying the voxel layout of the environment.
+        render_mode (Optional[str]): values of `screen` and `human` will automatically render to a debug window every `step()`. If set to `img` or `rgb_array`, `render()` will return an image array. No rendering by default (default = None)
+        render_options (Optional[Dict[str, Any]]): dictionary of rendering options. See EvoGymBase.render() for details (default = None)
     """
-    def __init__(self, world: EvoWorld) -> None:
+
+    metadata = {'render_modes': ['screen', 'human', 'img', 'rgb_array']}
+
+    def __init__(
+        self,
+        world: EvoWorld,
+        render_mode: Optional[str] = None,
+        render_options: Optional[Dict[str, Any]] = None,
+    ) -> None:
 
         # sim
-        self._sim = EvoSim(self.world)
+        self._sim = EvoSim(world)
         self._default_viewer = EvoViewer(self._sim)
+
+        # render
+        self._render_mode = render_mode
+        self._render_options = render_options
 
     def step(self, action: Dict[str, np.ndarray]) -> bool:
         """
-        Step the environment by running physcis computations.
+        Step the environment by running physics computations.
 
         Args:
             action (Dict[str, np.ndarray]): dictionary mapping robot names to actions. Actions are `(n,)` arrays, where `n` is the number of actuators in the target robot.
-        
+            
         Returns:
             bool: whether or not the simulation has reached an unstable state and cannot be recovered (`True` = unstable).
         """
@@ -42,10 +56,13 @@ def step(self, action: Dict[str, np.ndarray]) -> bool:
             a[abs(a) < 1e-8] = 0
             self._sim.set_action(robot_name, a)
         done = self._sim.step()
+
+        if self._render_mode == 'human' or self._render_mode == 'screen':
+            self.render()
 
         return done
 
-    def reset(self,) -> None:
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> None:
         """
         Reset the simulation to the initial state.
         """
@@ -71,27 +88,31 @@ def default_viewer(self,) -> EvoViewer:
         """
         return self._default_viewer
 
-    def render(self,
-               mode: str ='screen',
-               verbose: bool = False,
-               hide_background: bool = False,
-               hide_grid: bool = False,
-               hide_edges: bool = False,
-               hide_voxels: bool = False) -> Optional[np.ndarray]:
-        """
-        Render the simulation.
-
-        Args:
-            mode (str): values of 'screen' and 'human' will render to a debug window. If set to 'img' will return an image array.
-            verbose (bool): whether or not to print the rendering speed (rps) every second.
-            hide_background (bool): whether or not to render the cream-colored background. If shut off background will be white.
-            hide_grid (bool): whether or not to render the grid.
-            hide_edges (bool): whether or not to render edges around all objects.
-            hide_voxels (bool): whether or not to render voxels.
-
+    def render(
+        self,
+    ) -> Optional[np.ndarray]:
+        """
+        Render the simulation according to the `render_mode` and `render_options` specified at initialization.
+        The following rendering options are available as key-value pairs in the `render_options` dictionary:
+        - `verbose` (bool): whether or not to print the rendering speed (rps) every second. (default = False)
+        - `hide_background` (bool): whether or not to render the cream-colored background. If shut off background will be white. (default = False)
+        - `hide_grid` (bool): whether or not to render the grid. (default = False)
+        - `hide_edges` (bool): whether or not to render edges around all objects. (default = False)
+        - `hide_voxels` (bool): whether or not to render voxels. (default = False)
+        
         Returns:
-            Optional[np.ndarray]: if `mode` is set to `img`, will return an image array.
+            Optional[np.ndarray]: if `mode` is set to `img` or `rgb_array`, will return an image array. Otherwise, will return `None`.
         """
+        mode, render_options = self._render_mode, {} if self._render_options is None else self._render_options
+        if mode is None:
+            return None
+
+        verbose = render_options.get('verbose', False)
+        hide_background = render_options.get('hide_background', False)
+        hide_grid = render_options.get('hide_grid', False)
+        hide_edges = render_options.get('hide_edges', False)
+        hide_voxels = render_options.get('hide_voxels', False)
+
         return self.default_viewer.render(mode, verbose, hide_background, hide_grid, hide_edges, hide_voxels)
 
     def close(self) -> None:
@@ -360,9 +381,14 @@ class BenchmarkBase(EvoGymBase):
     DATA_PATH = pkg_resources.resource_filename('evogym.envs', os.path.join('sim_files'))
     VOXEL_SIZE = 0.1
 
-    def __init__(self, world):
+    def __init__(
+        self,
+        world: EvoWorld,
+        render_mode: Optional[str] = None,
+        render_options: Optional[Dict[str, Any]] = None,
+    ):
 
-        EvoGymBase.__init__(self, world)
+        EvoGymBase.__init__(self, world=world, render_mode=render_mode, render_options=render_options)
         self.default_viewer.track_objects('robot')
 
     def step(self, action):