diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5719c5e6..0c43ce4e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ -name: Testing +name: Test -on: [push, pull_request] +on: [push] permissions: contents: read @@ -10,16 +10,17 @@ concurrency: cancel-in-progress: true jobs: - build: + build_and_test: strategy: + fail-fast: false matrix: os: - - "ubuntu-22.04" - "ubuntu-latest" python: - "3.7" - "3.8" - "3.9" + - "3.10" runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -34,13 +35,14 @@ jobs: - name: Install deb dependencies run: sudo apt-get install -y xorg-dev libglu1-mesa-dev libglew-dev xvfb - name: Install python dependencies - run: pip install -r requirements.txt + run: pip install -r requirements-dev.txt - name: Cache python wheel packages uses: actions/cache@v3 with: path: ~/.cache/pip - key: wheel-cache-${{ hashFiles('requirements.txt') }} + key: wheel-cache-${{ hashFiles('requirements-dev.txt') }} - name: Install evolution gym run: pip install -e . - name: Run test - run: xvfb-run python -m unittest tests/test_render.py + run: xvfb-run python -m pytest -s -v -n auto -m lite + working-directory: tests diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 00000000..32317813 --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,58 @@ +name: Build + +on: [push] + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + # Window 64 bit + - os: windows-latest + platform_id: win_amd64 + + # Linux 64 bit + - os: ubuntu-latest + platform_id: manylinux_x86_64 + manylinux_image: manylinux_2_28 + + # manylinux2014 dosen't work due to an issue in GLEW: + # https://github.com/glfw/glfw/issues/2139 + # manylinux_image: manylinux2014 + + # MacOS x86_64 + - os: macos-12 + platform_id: macosx_x86_64 + + # MacOS arm64 + - os: macos-14 + platform_id: macosx_arm64 + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + # Used to host cibuildwheel + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + + - name: Install cibuildwheel + run: python -m pip install cibuildwheel==2.19.0 + + # Build wheels + - name: Build wheels + run: python -m cibuildwheel --output-dir wheelhouse + # to supply options, put them in 'env', like: + # env: + # CIBW_SOME_OPTION: value + + # Upload wheels + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2b99c80e..d66759ca 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ build_scripts .DS_Store exported/ req_ns.txt +backup/ diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..856a9285 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +graft evogym/simulator diff --git a/README.md b/README.md index f491045f..b9c5b68a 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,52 @@ # Evolution Gym -A large-scale benchmark for co-optimizing the design and control of soft robots. As seen in [Evolution Gym: A Large-Scale Benchmark for Evolving Soft Robots](https://evolutiongym.github.io/) (**NeurIPS 2021**). +![example workflow](https://github.com/EvolutionGym/evogym/actions/workflows/wheels.yml/badge.svg) +![example workflow](https://github.com/EvolutionGym/evogym/actions/workflows/test.yml/badge.svg) -[//]: # (teaser) -![teaser](images/teaser.gif) +Evolution Gym is a large-scale benchmark for co-optimizing the design and control of soft robots. It provides a lightweight soft-body simulator wrapped with a gym-like interface for developing learning algorithms. EvoGym also includes a suite of 32 locomotion and manipulation tasks, detailed on our [website](https://evolutiongym.github.io/all-tasks). Task suite evaluations are described in our [NeurIPS 2021 paper](https://arxiv.org/pdf/2201.09863). + +> [!NOTE] +> EvoGym has been recently updated! TLDR: requirements have been modernized (gym/gymnasium, numpy, etc.), and the library is now pip-installable. + +[//]: # (teaser) +![teaser](https://github.com/EvolutionGym/evogym/blob/main/images/teaser.gif) # Installation -Clone the repo and submodules: +EvoGym supports python `3.7` to `3.10` on most operating systems: ```shell -git clone --recurse-submodules https://github.com/EvolutionGym/evogym.git +pip install evogym --upgrade +``` + +> [!CAUTION] +> This doesn't work yet -- coming soon! For now, you can install from test pypi: +> ```shell +> pip install "numpy<2.0.0" gymnasium +> pip install -i https://test.pypi.org/simple/ evogym +> ``` + +On **Linux** install the following packages (or equivalent): + +```shell +sudo apt-get install xorg-dev libglu1-mesa-dev ``` +## From Source + +If your platform is not supported, you may alternatively build from source: + ### Requirements -* Python 3.7/3.8 -* Linux, macOS, or Windows with [Visual Studios 2017](https://visualstudio.microsoft.com/vs/older-downloads/) -* [OpenGL](https://www.opengl.org//) +* Python 3 +* Linux, macOS, or Windows with [Visual Studios 2017](https://visualstudio.microsoft.com/vs/older-downloads/) build tools. * [CMake](https://cmake.org/download/) -* [PyTorch](http://pytorch.org/) - +Clone the repo and submodules: + +```shell +git clone --recurse-submodules https://github.com/EvolutionGym/evogym.git +``` On **Linux only**: @@ -29,63 +54,143 @@ On **Linux only**: sudo apt-get install xorg-dev libglu1-mesa-dev ``` -Either install Python dependencies with conda: +Finally, to install `evogym`, run the following in the environment of your choice: ```shell -conda env create -f environment.yml -conda activate evogym +pip install -e . ``` -or with pip: +## Test Installation + +If you have the repo cloned, `cd` to the `examples` folder and run the following script: ```shell -pip install -r requirements.txt +python gym_test.py ``` -### Build and Install Package +Alternatively, you can run the following snippet: -To build the C++ simulation, build all the submodules, and install `evogym` run the following command: +```python +import gymnasium as gym +import evogym.envs +from evogym import sample_robot -```shell -python setup.py install -``` -### Test Installation +if __name__ == '__main__': -cd to the `examples` folder and run the following script: + body, connections = sample_robot((5,5)) + env = gym.make('Walker-v0', body=body, render_mode='human') + env.reset() -```shell -python gym_test.py + while True: + action = env.action_space.sample() + ob, reward, terminated, truncated, info = env.step(action) + + if terminated or truncated: + env.reset() + + env.close() ``` This script creates a random `5x5` robot in the `Walking-v0` environment. The robot is taking random actions. A window should open with a visualization of the environment -- kill the process from the terminal to close it. - +Error message: `libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so` -# Usage +Fix: `conda install -c conda-forge libstdcxx-ng` -## Examples + + +# Usage -To see example usage as well as to run co-design and control optimization experiments in EvoGym, please see the `examples` folder and its `README`. +In addition to the resources below, you can find API documentation on our [website](https://evolutiongym.github.io/documentation). ## Tutorials -You can find tutorials for getting started with the codebase on our [website](https://evolutiongym.github.io/tutorials). Completed code from all tutorials is also available in the `tutorials` folder. +You can find tutorials for getting started with the codebase on our [website](https://evolutiongym.github.io/tutorials). Completed code from all tutorials is also available in the `tutorials` folder, along with a `README`. Tutorials are included for: +- Using the [evogym API](https://evolutiongym.github.io/tutorials/basic-api.html) +- Making a [custom evogym environment](https://evolutiongym.github.io/tutorials/new-env.html) +- Supported [rendering options](https://github.com/EvolutionGym/evogym/blob/main/tutorials/rendering_options.py) + +## Examples + +To run co-design and control optimization experiments in EvoGym, please see the `examples` folder and its `README`. Included are scripts for: +- Running PPO +- Running a Genetic Algorithm +- Running Bayesian Optimization +- Running CPPN-NEAT +- Visualizing results +- Saving results as gifs -## Docs +Make sure you clone the repo with submodules: -You can find documentation on our [website](https://evolutiongym.github.io/documentation). +```shell +git clone --recurse-submodules https://github.com/EvolutionGym/evogym.git +``` + +Install the necessary python requirements: +```shell +pip install -r requirements.txt +``` ## Design Tool -For instructions on how to use the Evolution Gym Design Tool, please see [this repo](https://github.com/EvolutionGym/evogym-design-tool). +The Design Tool provides a gui for creating Evolution Gym environments. Please see [this repo](https://github.com/EvolutionGym/evogym-design-tool). + +[//]: # (teaser) +![teaser](images/design-tool.gif) + +## Headless Mode + +EvoGym runs in headless mode by default, without initializing libraries used for rendering. +These libraries are initialized on user requests. If using a server without rendering capabilities, ensure that: + +```python +# Envs are created with render_mode=None (None by default) +env = gym.make('Walker-v0', body=body, render_mode=None) +``` + +```python +# If using the low-level api, do not call EvoViewer.render() +world = EvoWorld.from_json(os.path.join('world_data', 'simple_environment.json')) +sim = EvoSim(world) +viewer = EvoViewer(sim) +viewer.render('img') # <-- Rendering libraries are initialized; do not call this +``` + +# Dev + +Install the repo with submodules: + +```shell +git clone --recurse-submodules https://github.com/EvolutionGym/evogym.git +``` + +Install the necessary python requirements. You will additionally need to install the dev requirements: +```shell +pip install -r requirements.txt +pip install -r requirements-dev.txt +``` + +## Run Tests + +From within the `tests` directory run the full test suite: + +```shell +cd tests +pytest -s -v -n auto +``` + +Or the lite test suite: + + +```shell +cd tests +pytest -s -v -n auto -m lite +``` # Citation diff --git a/evogym/envs/__init__.py b/evogym/envs/__init__.py index 6484d8c6..49e7efd3 100644 --- a/evogym/envs/__init__.py +++ b/evogym/envs/__init__.py @@ -10,7 +10,7 @@ from evogym.envs.traverse import * from evogym.envs.walk import * -from gym.envs.registration import register +from gymnasium.envs.registration import register ## SIMPLE ## register( diff --git a/evogym/envs/balance.py b/evogym/envs/balance.py index 61a8b6b7..8589fe35 100644 --- a/evogym/envs/balance.py +++ b/evogym/envs/balance.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,24 +10,31 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Balance(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v0.json')) self.world.add_from_array('robot', body, 15, 3, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) def get_obs(self, pos_final): com_final = np.mean(pos_final, 1) @@ -71,12 +78,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -84,26 +91,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class BalanceJump(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v1.json')) self.world.add_from_array('robot', body, 10, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) def get_obs(self, pos_final): com_final = np.mean(pos_final, 1) @@ -148,12 +161,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -161,4 +174,4 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/base.py b/evogym/envs/base.py index 7d007b18..82022ce4 100644 --- a/evogym/envs/base.py +++ b/evogym/envs/base.py @@ -1,10 +1,10 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding -from typing import Dict, Optional, List +from typing import Dict, Optional, List, Any from evogym import * import random @@ -19,20 +19,34 @@ class EvoGymBase(gym.Env): Args: world (EvoWorld): object specifying the voxel layout of the environment. + render_mode (Optional[str]): values of `screen` and `human` will automatically render to a debug window every `step()`. If set to `img` or `rgb_array`, `render()` will return an image array. No rendering by default (default = None) + render_options (Optional[Dict[str, Any]]): dictionary of rendering options. See EvoGymBase.render() for details (default = None) """ - def __init__(self, world: EvoWorld) -> None: + + metadata = {'render_modes': ['screen', 'human', 'img', 'rgb_array']} + + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ) -> None: # sim - self._sim = EvoSim(self.world) + self._sim = EvoSim(world) self._default_viewer = EvoViewer(self._sim) + + # render + self._render_mode = render_mode + self._render_options = render_options def step(self, action: Dict[str, np.ndarray]) -> bool: """ - Step the environment by running physcis computations. + Step the environment by running physics computations. Args: action (Dict[str, np.ndarray]): dictionary mapping robot names to actions. Actions are `(n,)` arrays, where `n` is the number of actuators in the target robot. - + Returns: bool: whether or not the simulation has reached an unstable state and cannot be recovered (`True` = unstable). """ @@ -42,10 +56,13 @@ def step(self, action: Dict[str, np.ndarray]) -> bool: a[abs(a) < 1e-8] = 0 self._sim.set_action(robot_name, a) done = self._sim.step() + + if self._render_mode == 'human' or self._render_mode == 'screen': + self.render() return done - def reset(self,) -> None: + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> None: """ Reset the simulation to the initial state. """ @@ -71,34 +88,40 @@ def default_viewer(self,) -> EvoViewer: """ return self._default_viewer - def render(self, - mode: str ='screen', - verbose: bool = False, - hide_background: bool = False, - hide_grid: bool = False, - hide_edges: bool = False, - hide_voxels: bool = False) -> Optional[np.ndarray]: - """ - Render the simulation. - - Args: - mode (str): values of 'screen' and 'human' will render to a debug window. If set to 'img' will return an image array. - verbose (bool): whether or not to print the rendering speed (rps) every second. - hide_background (bool): whether or not to render the cream-colored background. If shut off background will be white. - hide_grid (bool): whether or not to render the grid. - hide_edges (bool): whether or not to render edges around all objects. - hide_voxels (bool): whether or not to render voxels. - + def render( + self, + ) -> Optional[np.ndarray]: + """ + Render the simulation according to the `render_mode` and `render_options` specified at initialization. + The following rendering options are available as key-value pairs in the `render_options` dictionary: + - `verbose` (bool): whether or not to print the rendering speed (rps) every second. (default = False) + - `hide_background` (bool): whether or not to render the cream-colored background. If shut off background will be white. (default = False) + - `hide_grid` (bool): whether or not to render the grid. (default = False) + - `hide_edges` (bool): whether or not to render edges around all objects. (default = False) + - `hide_voxels` (bool): whether or not to render voxels. (default = False) + Returns: - Optional[np.ndarray]: if `mode` is set to `img`, will return an image array. + Optional[np.ndarray]: if `mode` is set to `img` or `rgb_array`, will return an image array. Otherwise, will return `None`. """ + mode, render_options = self._render_mode, {} if self._render_options is None else self._render_options + if mode is None: + return None + + verbose = render_options.get('verbose', False) + hide_background = render_options.get('hide_background', False) + hide_grid = render_options.get('hide_grid', False) + hide_edges = render_options.get('hide_edges', False) + hide_voxels = render_options.get('hide_voxels', False) + return self.default_viewer.render(mode, verbose, hide_background, hide_grid, hide_edges, hide_voxels) def close(self) -> None: """ Close the simulation. """ - self.default_viewer.hide_debug_window() + self.default_viewer.close() + del self._default_viewer + del self._sim def get_actuator_indices(self, robot_name: str) -> np.ndarray: """ @@ -360,19 +383,15 @@ class BenchmarkBase(EvoGymBase): DATA_PATH = pkg_resources.resource_filename('evogym.envs', os.path.join('sim_files')) VOXEL_SIZE = 0.1 - def __init__(self, world): + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): - EvoGymBase.__init__(self, world) + EvoGymBase.__init__(self, world=world, render_mode=render_mode, render_options=render_options) self.default_viewer.track_objects('robot') - - def step(self, action): - - action_copy = {} - - for robot_name, a in action.items(): - action_copy[robot_name] = a + 1 - - return super().step(action_copy) def pos_at_time(self, time): return super().pos_at_time(time)*self.VOXEL_SIZE diff --git a/evogym/envs/change_shape.py b/evogym/envs/change_shape.py index c782edc6..14b98872 100644 --- a/evogym/envs/change_shape.py +++ b/evogym/envs/change_shape.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,22 +10,29 @@ from math import * import numpy as np import os +from typing import Dict, Any, Optional class ShapeBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): - def reset(self): + super().__init__(world=world, render_mode=render_mode, render_options=render_options) + + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} ### ---------------------------------------------------------------------- @@ -77,21 +84,27 @@ def convex_poly_area(self, pts_cw): class MaximizeShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -117,8 +130,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -136,21 +149,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MinimizeShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -176,8 +195,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -195,21 +214,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MaximizeXShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -235,8 +260,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -255,21 +280,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MaximizeYShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -295,8 +326,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): diff --git a/evogym/envs/climb.py b/evogym/envs/climb.py index 72377f9c..a71eb7d3 100644 --- a/evogym/envs/climb.py +++ b/evogym/envs/climb.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,16 +10,22 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class ClimbBase(BenchmarkBase): - def __init__(self, world): - - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -27,26 +33,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class Climb0(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -79,26 +91,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class Climb1(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v1.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -131,27 +149,33 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class Climb2(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v2.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 3 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -182,12 +206,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -197,4 +221,4 @@ def reset(self): self.get_ceil_obs("robot", ["pipe"], self.sight_dist), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/flip.py b/evogym/envs/flip.py index 994e35db..d529d1e6 100644 --- a/evogym/envs/flip.py +++ b/evogym/envs/flip.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,25 +10,32 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Flipping(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Flipper-v0.json')) self.world.add_from_array('robot', body, 60, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) # reward self.num_flips = 0 @@ -78,12 +85,12 @@ def step(self, action): done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) self.num_flips = 0 @@ -93,4 +100,4 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/jump.py b/evogym/envs/jump.py index 0fab281e..ac528cc4 100644 --- a/evogym/envs/jump.py +++ b/evogym/envs/jump.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,25 +10,32 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class StationaryJump(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Jumper-v0.json')) self.world.add_from_array('robot', body, 32, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 2 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + (self.sight_dist*2 +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + (self.sight_dist*2 +1),), dtype=float) def step(self, action): @@ -58,12 +65,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -72,4 +79,4 @@ def reset(self): self.get_floor_obs("robot", ["ground"], self.sight_dist), )) - return obs + return obs, {} diff --git a/evogym/envs/manipulate.py b/evogym/envs/manipulate.py index 083d3c5d..1c7b257c 100644 --- a/evogym/envs/manipulate.py +++ b/evogym/envs/manipulate.py @@ -1,8 +1,8 @@ from evogym.envs.base import EvoGymBase -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -11,15 +11,22 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class PackageBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) self.default_viewer.track_objects('robot', 'package') - def get_obs(self, robot_pos_final, robot_vel_final, package_pos_final, package_vel_final): + def get_obs(self, robot_pos_final, robot_vel_final, package_pos_final, package_vel_final) -> np.ndarray: robot_com_pos = np.mean(robot_pos_final, axis=1) robot_com_vel = np.mean(robot_vel_final, axis=1) @@ -55,9 +62,9 @@ def get_reward(self, package_pos_init, package_pos_final, robot_pos_init, robot_ return reward - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -71,26 +78,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class CarrySmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Carrier-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 3.0*self.VOXEL_SIZE @@ -149,26 +162,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class CarrySmallRectToTable(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Carrier-v1.json')) self.world.add_from_array('robot', body, 1, 4, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 6.0*self.VOXEL_SIZE @@ -225,26 +244,32 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class PushSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Pusher-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -285,26 +310,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class PushSmallRectOnOppositeSide(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Pusher-v1.json')) self.world.add_from_array('robot', body, 13, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -345,26 +376,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class ThrowSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Thrower-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -415,15 +452,24 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class CatchSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): self.robot_body = body self.robot_connections = connections + + self.render_mode = render_mode + self.render_options = render_options self.random_init() @@ -452,7 +498,7 @@ def random_init(self): self.world.add_object(peg2) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=self.render_mode, render_options=self.render_options) self.default_viewer.track_objects('robot', 'package') @@ -460,13 +506,13 @@ def random_init(self): num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 5.0*self.VOXEL_SIZE - def get_obs_catch(self, robot_pos_final, package_pos_final): + def get_obs_catch(self, robot_pos_final, package_pos_final) -> np.ndarray: robot_com_pos = np.mean(robot_pos_final, axis=1) package_com_pos = np.mean(package_pos_final, axis=1) @@ -525,12 +571,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) self.default_viewer.hide_debug_window() self.random_init() @@ -558,25 +604,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class ToppleBeam(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BeamToppler-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -651,12 +703,12 @@ def step(self, action): reward += 1.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -671,25 +723,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class SlideBeam(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BeamSlider-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -760,12 +818,12 @@ def step(self, action): reward += 1.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -780,25 +838,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class LiftSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Lifter-v0.json')) self.world.add_from_array('robot', body, 2, 3, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) def get_reward_lift(self, robot_pos_init, robot_pos_final, package_pos_init, package_pos_final): @@ -852,12 +916,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -872,5 +936,5 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} \ No newline at end of file diff --git a/evogym/envs/multi_goal.py b/evogym/envs/multi_goal.py index 9be0e38b..2243d68b 100644 --- a/evogym/envs/multi_goal.py +++ b/evogym/envs/multi_goal.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,6 +10,7 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Goal(): def __init__(self, name, requirements = None): @@ -20,8 +21,14 @@ def evaluate_reward(self, args): class GoalBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) def init_reward_goals(self, goals): @@ -90,21 +97,27 @@ def get_obs(self, args): class BiWalk(GoalBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BidirectionalWalker-v0.json')) self.world.add_from_array('robot', body, 33, 1, connections=connections) # init sim - GoalBase.__init__(self, self.world) + GoalBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(5 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(5 + num_robot_points,), dtype=float) self.set_random_goals(20, 50, 100) @@ -171,12 +184,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) self.current_goal = 0 self.set_random_goals(20, 50, 100) @@ -196,4 +209,4 @@ def reset(self): }) )) - return obs + return obs, {} diff --git a/evogym/envs/traverse.py b/evogym/envs/traverse.py index faeb8bec..1cfcbdb9 100644 --- a/evogym/envs/traverse.py +++ b/evogym/envs/traverse.py @@ -1,8 +1,8 @@ from evogym.envs.base import EvoGymBase -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -11,11 +11,18 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class StairsBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) def get_reward(self, robot_pos_init, robot_pos_final): @@ -25,9 +32,9 @@ def get_reward(self, robot_pos_init, robot_pos_final): reward = (robot_com_pos_final[0] - robot_com_pos_init[0]) return reward - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -38,27 +45,33 @@ def reset(self): self.get_floor_obs("robot", ["ground"], self.sight_dist), )) - return obs + return obs, {} class StepsUp(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'UpStepper-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -97,27 +110,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class StepsDown(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'DownStepper-v0.json')) self.world.add_from_array('robot', body, 1, 11, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -156,27 +175,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class WalkingBumpy(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ObstacleTraverser-v0.json')) self.world.add_from_array('robot', body, 2, 1, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -216,27 +241,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class WalkingBumpy2(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ObstacleTraverser-v1.json')) self.world.add_from_array('robot', body, 2, 4, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -273,27 +304,33 @@ def step(self, action): done = True reward += 2.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class VerticalBarrier(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Hurdler-v0.json')) self.world.add_from_array('robot', body, 2, 4, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -328,27 +365,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class FloatingPlatform(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'PlatformJumper-v0.json')) self.world.add_from_array('robot', body, 1, 6, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) # terrain self.terrain_list = ["platform_1", "platform_2", "platform_3", "platform_4", "platform_5", "platform_6", "platform_7"] @@ -392,12 +435,12 @@ def step(self, action): reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -408,26 +451,32 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs + return obs, {} class Gaps(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'GapJumper-v0.json')) self.world.add_from_array('robot', body, 1, 6, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) # terrain self.terrain_list = ["platform_1", "platform_2", "platform_3", "platform_4", "platform_5"] @@ -466,12 +515,12 @@ def step(self, action): reward -= 3.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -482,11 +531,17 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs + return obs, {} class BlockSoup(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Traverser-v0.json')) @@ -526,15 +581,15 @@ def __init__(self, body, connections=None): count += 1 # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -570,12 +625,12 @@ def step(self, action): done = True reward += 2.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -586,4 +641,4 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/walk.py b/evogym/envs/walk.py index d02a4f68..c753529b 100644 --- a/evogym/envs/walk.py +++ b/evogym/envs/walk.py @@ -1,7 +1,8 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding +from typing import Optional, Dict, Any from evogym import * from evogym.envs import BenchmarkBase @@ -13,21 +14,27 @@ class WalkingFlat(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Walker-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -61,12 +68,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -74,25 +81,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class SoftBridge(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BridgeWalker-v0.json')) self.world.add_from_array('robot', body, 2, 5, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + 1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + 1 + num_robot_points,), dtype=float) def step(self, action): @@ -127,12 +140,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -141,26 +154,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class Duck(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'CaveCrawler-v0.json')) self.world.add_from_array('robot', body, 1, 2, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + 2*(self.sight_dist*2 +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + 2*(self.sight_dist*2 +1),), dtype=float) def step(self, action): @@ -196,12 +215,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -211,4 +230,4 @@ def reset(self): self.get_ceil_obs("robot", ["terrain"], self.sight_dist), )) - return obs + return obs, {} diff --git a/evogym/simulator/SimulatorCPP/Environment.h b/evogym/simulator/SimulatorCPP/Environment.h index b1b6302d..8ab01ae6 100644 --- a/evogym/simulator/SimulatorCPP/Environment.h +++ b/evogym/simulator/SimulatorCPP/Environment.h @@ -17,12 +17,12 @@ using namespace std; using namespace Eigen; -#ifdef __APPLE__ +// #ifdef __APPLE__ +// using RefMatrixXd = Matrix ; +// #else +// using RefMatrixXd = Ref ; +// #endif using RefMatrixXd = Matrix ; -#else -using RefMatrixXd = Ref ; -#endif - class Environment { private: diff --git a/evogym/simulator/SimulatorCPP/Interface.cpp b/evogym/simulator/SimulatorCPP/Interface.cpp index cbf0bd0d..ee7bbcb9 100644 --- a/evogym/simulator/SimulatorCPP/Interface.cpp +++ b/evogym/simulator/SimulatorCPP/Interface.cpp @@ -582,6 +582,11 @@ void Interface::hide_debug_window() { debug_window_showing = false; } +void Interface::close() { + glfwDestroyWindow(debug_window); + glfwTerminate(); +} + vector Interface::get_debug_window_pos() { int xpos, ypos; diff --git a/evogym/simulator/SimulatorCPP/Interface.h b/evogym/simulator/SimulatorCPP/Interface.h index fa79d631..37a47519 100644 --- a/evogym/simulator/SimulatorCPP/Interface.h +++ b/evogym/simulator/SimulatorCPP/Interface.h @@ -68,6 +68,7 @@ class Interface void show_debug_window(); void hide_debug_window(); + void close(); vector get_debug_window_pos(); GLFWwindow* get_debug_window_ref(); diff --git a/evogym/simulator/SimulatorCPP/PythonBindings.cpp b/evogym/simulator/SimulatorCPP/PythonBindings.cpp index 27fcddaa..80b84145 100644 --- a/evogym/simulator/SimulatorCPP/PythonBindings.cpp +++ b/evogym/simulator/SimulatorCPP/PythonBindings.cpp @@ -21,6 +21,7 @@ PYBIND11_MODULE(simulator_cpp, m) { .def("render", &Interface::render, py::arg("camera"), py::arg("hide_background") = false, py::arg("hide_grid") = false, py::arg("hide_edges") = false, py::arg("hide_boxels") = false, py::arg("dont_clear") = false) .def("show_debug_window", &Interface::show_debug_window) .def("hide_debug_window", &Interface::hide_debug_window) + .def("close", &Interface::close) .def("get_debug_window_pos", &Interface::get_debug_window_pos, py::return_value_policy::copy); py::class_(m, "Sim") diff --git a/evogym/utils.py b/evogym/utils.py index f046a265..8a5b040d 100644 --- a/evogym/utils.py +++ b/evogym/utils.py @@ -57,11 +57,12 @@ def get_uniform(x: int) -> np.ndarray: Return a uniform distribution of a given size. Args: - x (int): size of distribution. + x (int): size of distribution. Must be positive. Returns: np.ndarray: array representing the probability distribution. """ + assert x > 0, f"Invalid size {x} for uniform distribution. Must be positive." return np.ones((x)) / x def draw(pd: np.ndarray) -> int: @@ -69,14 +70,19 @@ def draw(pd: np.ndarray) -> int: Sample from a probability distribution. Args: - pd (np.ndarray): array representing the probability of sampling each element. + pd (np.ndarray): array representing the relative probability of sampling each element. Entries must be non-negative and sum to a non-zero value. Must contain at least one element. Returns: int: sampled index. """ pd_copy = pd.copy() - if (type(pd_copy) != type(np.array([]))): + if not isinstance(pd_copy, np.ndarray): pd_copy = np.array(pd_copy) + + assert pd_copy.size > 0, f"Invalid size {pd_copy.size} for probability distribution. Must contain at least one element." + assert np.all(pd_copy >= 0), f"Invalid probability distribution {pd_copy}. Entries must be non-negative." + assert np.sum(pd_copy) > 0, f"Invalid probability distribution {pd_copy}. Entries must sum to a non-zero value." + pd_copy = pd_copy / pd_copy.sum() rand = random.uniform(0, 1) @@ -88,17 +94,31 @@ def draw(pd: np.ndarray) -> int: def sample_robot( robot_shape: Tuple[int, int], - pd: np.ndarray = None) -> Tuple[np.ndarray, np.ndarray]: + pd: Optional[np.ndarray] = None +) -> Tuple[np.ndarray, np.ndarray]: """ Return a randomly sampled robot of a particular size. Args: robot_shape (Tuple(int, int)): robot shape to sample `(h, w)`. - pd (np.ndarray): `(5,)` array representing the probability of sampling each robot voxel (empty, rigid, soft, h_act, v_act). Defaults to a custom distribution. (default = None) + pd (np.ndarray): `(5,)` array representing the relative probability of sampling each robot voxel (empty, rigid, soft, h_act, v_act). Defaults to a custom distribution. (default = None) Returns: Tuple[np.ndarray, np.ndarray]: randomly sampled (valid) robot voxel array and its associated connections array. + + Throws: + If it is not possible to sample a connected robot with at least one actuator. """ + + h_act, v_act, empty = VOXEL_TYPES['H_ACT'], VOXEL_TYPES['V_ACT'], VOXEL_TYPES['EMPTY'] + + if pd is not None: + assert pd.shape == (5,), f"Invalid probability distribution {pd}. Must have shape (5,)." + if pd[h_act] + pd[v_act] == 0: + raise ValueError(f"Invalid probability distribution {pd}. Must have a non-zero probability of sampling an actuator.") + if sum(pd) - pd[empty] == 0: + raise ValueError(f"Invalid probability distribution {pd}. Must have a non-zero probability of sampling a non-empty voxel.") + done = False while (not done): @@ -219,7 +239,7 @@ def has_actuator(robot: np.ndarray) -> bool: def get_full_connectivity(robot: np.ndarray) -> np.ndarray: """ - Returns a connections array given a connected robot structure. Assumes all adjacent voxels are connected. + Returns a connections array given a structure. Assumes all adjacent voxels are connected. Args: robot (np.ndarray): array specifing the voxel structure of the robot. diff --git a/evogym/viewer.py b/evogym/viewer.py index 20cb787a..5e7b18c4 100644 --- a/evogym/viewer.py +++ b/evogym/viewer.py @@ -134,6 +134,13 @@ def hide_debug_window(self,) -> None: if self._has_init_viewer: self._viewer.hide_debug_window() + def close(self,) -> None: + """ + Close the viewer. + """ + if self._has_init_viewer: + self._viewer.close() + def track_objects(self, *objects: Tuple[str]) -> None: """ Set objects for the viewer to automatically track. The viewer tracks objects by adjusting its `pos` and `view_size` automatically every time before rendering. @@ -187,18 +194,20 @@ def set_tracking_settings(self, **settings) -> None: parsed_arg = arg.split('_')[1] self._tracking_lock[arg] = parsed_arg - def render(self, - mode: str ='screen', - verbose: bool = False, - hide_background: bool = False, - hide_grid: bool = False, - hide_edges: bool = False, - hide_voxels: bool = False) -> Optional[np.ndarray]: + def render( + self, + mode: str = 'screen', + verbose: bool = False, + hide_background: bool = False, + hide_grid: bool = False, + hide_edges: bool = False, + hide_voxels: bool = False + ) -> Optional[np.ndarray]: """ Render the simulation. Args: - mode (str): values of 'screen' and 'human' will render to a debug window. If set to 'img' will return an image array. + mode (str): values of `screen` and `human` will render to a debug window. If set to `img` or `rgb_array` will return an image array. verbose (bool): whether or not to print the rendering speed (rps) every second. hide_background (bool): whether or not to render the cream-colored background. If shut off background will be white. hide_grid (bool): whether or not to render the grid. @@ -206,10 +215,10 @@ def render(self, hide_voxels (bool): whether or not to render voxels. Returns: - Optional[np.ndarray]: if `mode` is set to `img`, will return an image array. + Optional[np.ndarray]: if `mode` is set to `img` or `rgb_array`, will return an image array. """ - accepted_modes = ['screen', 'human', 'img'] + accepted_modes = ['screen', 'human', 'img', 'rgb_array'] if not mode in accepted_modes: raise ValueError( f'mode {mode} is not a valid mode. The valid modes are {accepted_modes}' @@ -236,7 +245,7 @@ def render(self, self._has_init_screen_camera = True self._viewer.render(self.screen_camera, *render_settings) - if mode == 'img': + if mode == 'img' or mode == 'rgb_array': if not self._has_init_img_camera: self._init_img_camera() self._has_init_img_camera = True diff --git a/examples/README.md b/examples/README.md index d2f32238..b704a0c7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,93 +1,45 @@ # Examples -This readme describes how to run several control optimization and co-design experiments and visualize the results. +This readme describes how to run several control optimization and co-design experiments and visualize the results. All scripts should be run from within the `examples` directory. Ensure that you have installed requirements: `pip install -r requirements.txt` and cloned the repo with submodules before running any of these scripts. -## PPO (control optimization) - -To set the parameters of group ppo, you can edit `run_group_ppo.py` and change the following: - -* `experiment_name` = all experiment files are saved to `saved_data/experiment_name` - -Create `SimJob`s to specify which robots to train and in which environments to train them. `SimJob`s are parameterized by the following: - -* `name` = all job files are saved to `saved_data/experiment_name/name` -* `robots` = array of robot names specifying which robots to train. Robot files must be of type `.json` (created using the EvoGym Design Tool) or `.npz` (saved from another experiment) and must be located in `examples/world_data` -* `envs` = array of environment names in which to train robots -* `train_iters` = number of iterations of ppo to train each robot's controller - -Each robot in `robots` will be trained in each environment in `envs` for `train_iters` iterations of ppo. - -From within `example`, you can run group ppo with the following command: - -```shell -python run_group_ppo.py --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 4 --num-steps 128 --num-mini-batch 4 --log-interval 100 --use-linear-lr-decay --entropy-coef 0.01 --eval-interval 50 -``` - -All ppo hyperparameters are specified through command line arguments. For more details please see [this repo](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail). - - -## Genetic Algorithm (co-design) +> [!WARNING] +> Many of these scripts have been modified in favor of improved usability. If you wish to exactly recreate the results from the original EvoGym paper, please see the [original release](https://github.com/EvolutionGym/evogym/releases/tag/1.0.0). -To set the parameters of the genetic algorithm, you can edit `run_ga.py` and change the following: - -* `seed` = seed to control randomness -* `pop_size` = the algorithm evolves robots in populations of this size -* `structure_shape` = each robot is represented by `(m,n)` matrix of voxels -* `experiment_name` = all experiment files are saved to `saved_data/experiment_name` -* `max_evaluations` = maximum number of unique robots to evaluate -* `train_iters` = number of iterations of ppo to train each robot's controller -* `num_cores` = number of robots to train in parallel. Note: the total number of processes created will be `num_cores * num_processes` (as specified below in the command line) - -From within `example`, you can run the genetic algorithm with the following command: - -```shell -python run_ga.py --env-name "Walker-v0" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 4 --num-steps 128 --num-mini-batch 4 --log-interval 100 --use-linear-lr-decay --entropy-coef 0.01 --eval-interval 50 -``` - -The environment name as well as all ppo hyperparameters are specified through command line arguments. For more details please see [this repo](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail). - - -## Bayesian Optimization (co-design) - -To set the parameters of bayesian optimization, you can edit `run_bo.py` and change the following: - -* `seed` = seed to control randomness -* `pop_size` = the algorithm evolves robots in populations of this size -* `structure_shape` = each robot is represented by `(m,n)` matrix of voxels -* `experiment_name` = all experiment files are saved to `saved_data/experiment_name` -* `max_evaluations` = maximum number of unique robots to evaluate. Should be a multiple of `pop_size` -* `train_iters` = number of iterations of ppo to train each robot's controller -* `num_cores` = number of robots to train in parallel. Note: the total number of processes created will be `num_cores * num_processes` (as specified below in the command line) +## PPO (control optimization) -From within `example`, you can run bayesian optimization with the following command: +The script is set up to train a robot's policy in the `Walker-v0` environment: ```shell -python run_bo.py --env-name "Walker-v0" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 4 --num-steps 128 --num-mini-batch 4 --log-interval 100 --use-linear-lr-decay --entropy-coef 0.01 --eval-interval 50 +python run_ppo.py --n-envs 4 --n-eval-envs 4 --n-evals 4 --eval-interval 10000 --total-timesteps 100000 ``` -The environment name as well as all ppo hyperparameters are specified through command line arguments. For more details please see [this repo](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail). +Results saved to `saved_data/test_ppo`. See `python run_ppo.py --help` to see how training can be customized. We recommend setting `total-timesteps > 1e6` for a serious training run. +## Co-Design -## CPPN-NEAT (co-design) +All three co-design algorithms: the genetic algorithm, bayesian optimization, and CPPN-NEAT have the same core parameters: -To set the parameters of cppn-neat, you can edit `run_cppn_neat.py` and change the following: +* `exp-name` = all experiment files are saved to `saved_data/exp_name` +* `env-name` = environment on which to run co-design +* `pop-size` = the algorithm evolves robots in populations of this size +* `structure-shape` = each robot is represented by `(m,n)` matrix of voxels +* `max-evaluations` = maximum number of unique robots to evaluate +* `num-cores` = number of robots to train in parallel. Note: the total number of processes created will be `num-cores * n-envs` (as specified below in the command line) -* `seed` = seed to control randomness -* `pop_size` = the algorithm evolves robots in populations of this size -* `structure_shape` = each robot is represented by `(m,n)` matrix of voxels -* `experiment_name` = all experiment files are saved to `saved_data/experiment_name` -* `max_evaluations` = maximum number of unique robots to evaluate. Should be a multiple of `pop_size` -* `train_iters` = number of iterations of ppo to train each robot's controller -* `num_cores` = number of robots to train in parallel. Note: the total number of processes created will be `num_cores * num_processes` (as specified below in the command line) +See all options via +`python run_ga.py --help` +`python run_bo.py --help` +`python run_cppn_neat.py --help` -From within `example`, you can run cppn-neat with the following command: +From within `example`, you can run the co-design algorithms as follows: ```shell -python run_cppn_neat.py --env-name "Walker-v0" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 4 --num-steps 128 --num-mini-batch 4 --log-interval 100 --use-linear-lr-decay --entropy-coef 0.01 --eval-interval 50 +python run_ga.py --eval-interval 10000 --total-timesteps 100000 +python run_bo.py --eval-interval 10000 --total-timesteps 100000 +python run_cppn_neat.py --eval-interval 10000 --total-timesteps 100000 ``` -The environment name as well as all ppo hyperparameters are specified through command line arguments. For more details please see [this repo](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail). - +Note that the default parameters are set for testing purposes, and will not produce task-performant robots. Feel free to increase the co-design/PPO parameters based on your compute availability. You may also reference evaluation parameters from [Appendix D. of our paper](https://arxiv.org/pdf/2201.09863). ## Visualize @@ -99,14 +51,11 @@ python visualize.py --env-name "Walker-v0" Use the appropriate environment name and follow the on-screen instructions. -To visualize the results of a group ppo experiment you can use the same command -- the environment name is no longer necessary. - ## Make Gifs -To set the parameters of the gif generating script, you can edit `make_gifs.py` and change the following: +This script generates gifs from co-design experiments. To set the parameters of the gif generating script, you can edit `make_gifs.py` and change the following: * `GIF_RESOLUTION` = resolution of produced gifs -* `NUM_PROC` = number of gifs to produce in parallel * `name` = all files are saved to `saved_data/all_media/name` * `experiment_names`, `env_names` = arrays of experiments their corresponding environments to generate gifs for * `load_dir` = directory where experiments are stored diff --git a/examples/bo/run.py b/examples/bo/run.py index 7cc0707a..9cd5e6f4 100644 --- a/examples/bo/run.py +++ b/examples/bo/run.py @@ -1,9 +1,8 @@ -from distutils.command.config import config import os from re import X import shutil -import random import numpy as np +import argparse from GPyOpt.core.task.space import Design_space from GPyOpt.models import GPModel @@ -13,17 +12,9 @@ from GPyOpt.core.evaluators import ThompsonBatch from .optimizer import Objective, Optimization -import sys -curr_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.join(curr_dir, '..') -external_dir = os.path.join(root_dir, 'externals') -sys.path.insert(0, root_dir) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) - +from ppo.run import run_ppo import evogym.envs from evogym import is_connected, has_actuator, get_full_connectivity -from utils.algo_utils import TerminationCondition -from ppo import run_ppo def get_robot_from_genome(genome, config): ''' @@ -36,6 +27,8 @@ def get_robot_from_genome(genome, config): def eval_genome_cost(genome, config, genome_id, generation): robot = get_robot_from_genome(genome, config) + args, env_name = config['args'], config['env_name'] + if not (is_connected(robot) and has_actuator(robot)): return 10 else: @@ -45,9 +38,7 @@ def eval_genome_cost(genome, config, genome_id, generation): save_path_controller = os.path.join(save_path_generation, 'controller') np.savez(save_path_structure, robot, connectivity) fitness = run_ppo( - structure=(robot, connectivity), - termination_condition=TerminationCondition(config['train_iters']), - saving_convention=(save_path_controller, genome_id), + args, robot, env_name, save_path_controller, f'{genome_id}', connectivity ) cost = -fitness return cost @@ -61,20 +52,23 @@ def eval_genome_constraint(genomes, config): return np.array(all_violation) def run_bo( - experiment_name, - structure_shape, - pop_size, - max_evaluations, - train_iters, - num_cores, - ): - - save_path = os.path.join(root_dir, 'saved_data', experiment_name) + args: argparse.Namespace, +): + exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = ( + args.exp_name, + args.env_name, + args.pop_size, + args.structure_shape, + args.max_evaluations, + args.num_cores, + ) + + save_path = os.path.join('saved_data', exp_name) try: os.makedirs(save_path) except: - print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS') + print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS') print('Override? (y/n): ', end='') ans = input() if ans.lower() == 'y': @@ -88,13 +82,13 @@ def run_bo( with open(save_path_metadata, 'w') as f: f.write(f'POP_SIZE: {pop_size}\n' \ f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \ - f'MAX_EVALUATIONS: {max_evaluations}\n' \ - f'TRAIN_ITERS: {train_iters}\n') + f'MAX_EVALUATIONS: {max_evaluations}\n') config = { 'structure_shape': structure_shape, - 'train_iters': train_iters, 'save_path': save_path, + 'args': args, # args for run_ppo + 'env_name': env_name, } def constraint_func(genome): diff --git a/examples/cppn_neat/run.py b/examples/cppn_neat/run.py index 91c78f45..d0649793 100644 --- a/examples/cppn_neat/run.py +++ b/examples/cppn_neat/run.py @@ -1,9 +1,9 @@ import os import shutil -import random import numpy as np import torch import neat +import argparse import sys curr_dir = os.path.dirname(os.path.abspath(__file__)) @@ -11,16 +11,14 @@ external_dir = os.path.join(root_dir, 'externals') sys.path.insert(0, root_dir) sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT')) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) from pytorch_neat.cppn import create_cppn from .parallel import ParallelEvaluator from .population import Population -from utils.algo_utils import TerminationCondition -from ppo import run_ppo -from evogym import is_connected, has_actuator, get_full_connectivity, hashable +from ppo.run import run_ppo import evogym.envs +from evogym import is_connected, has_actuator, get_full_connectivity, hashable def get_cppn_input(structure_shape): @@ -43,15 +41,16 @@ def get_robot_from_genome(genome, config): def eval_genome_fitness(genome, config, genome_id, generation): robot = get_robot_from_genome(genome, config) + args, env_name = config.extra_info['args'], config.extra_info['env_name'] + connectivity = get_full_connectivity(robot) save_path_generation = os.path.join(config.extra_info['save_path'], f'generation_{generation}') save_path_structure = os.path.join(save_path_generation, 'structure', f'{genome_id}') save_path_controller = os.path.join(save_path_generation, 'controller') np.savez(save_path_structure, robot, connectivity) + fitness = run_ppo( - structure=(robot, connectivity), - termination_condition=TerminationCondition(config.extra_info['train_iters']), - saving_convention=(save_path_controller, genome_id), + args, robot, env_name, save_path_controller, f'{genome_id}', connectivity ) return fitness @@ -93,20 +92,23 @@ def post_evaluate(self, config, population, species, best_genome): f.write(out) def run_cppn_neat( - experiment_name, - structure_shape, - pop_size, - max_evaluations, - train_iters, - num_cores, - ): + args: argparse.Namespace +): + exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = ( + args.exp_name, + args.env_name, + args.pop_size, + args.structure_shape, + args.max_evaluations, + args.num_cores, + ) - save_path = os.path.join(root_dir, 'saved_data', experiment_name) + save_path = os.path.join('saved_data', exp_name) try: os.makedirs(save_path) except: - print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS') + print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS') print('Override? (y/n): ', end='') ans = input() if ans.lower() == 'y': @@ -120,8 +122,7 @@ def run_cppn_neat( with open(save_path_metadata, 'w') as f: f.write(f'POP_SIZE: {pop_size}\n' \ f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \ - f'MAX_EVALUATIONS: {max_evaluations}\n' \ - f'TRAIN_ITERS: {train_iters}\n') + f'MAX_EVALUATIONS: {max_evaluations}\n') structure_hashes = {} @@ -134,9 +135,10 @@ def run_cppn_neat( config_path, extra_info={ 'structure_shape': structure_shape, - 'train_iters': train_iters, 'save_path': save_path, 'structure_hashes': structure_hashes, + 'args': args, # args for run_ppo + 'env_name': env_name, }, custom_config=[ ('NEAT', 'pop_size', pop_size), diff --git a/examples/ga/run.py b/examples/ga/run.py index fdb98201..5d033f4b 100644 --- a/examples/ga/run.py +++ b/examples/ga/run.py @@ -3,34 +3,40 @@ import shutil import random import math +import argparse +from typing import List -import sys -curr_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.join(curr_dir, '..') -external_dir = os.path.join(root_dir, 'externals') -sys.path.insert(0, root_dir) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) - -from ppo import run_ppo +from ppo.run import run_ppo +import evogym.envs from evogym import sample_robot, hashable import utils.mp_group as mp -from utils.algo_utils import get_percent_survival_evals, mutate, TerminationCondition, Structure +from utils.algo_utils import get_percent_survival_evals, mutate, Structure -def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_iters, num_cores): +def run_ga( + args: argparse.Namespace, +): print() - - ### STARTUP: MANAGE DIRECTORIES ### - home_path = os.path.join(root_dir, "saved_data", experiment_name) + + exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = ( + args.exp_name, + args.env_name, + args.pop_size, + args.structure_shape, + args.max_evaluations, + args.num_cores, + ) + + ### MANAGE DIRECTORIES ### + home_path = os.path.join("saved_data", exp_name) start_gen = 0 - ### DEFINE TERMINATION CONDITION ### - tc = TerminationCondition(train_iters) + ### DEFINE TERMINATION CONDITION ### is_continuing = False try: os.makedirs(home_path) except: - print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS') + print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS') print("Override? (y/n/c): ", end="") ans = input() if ans.lower() == "y": @@ -46,10 +52,10 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it ### STORE META-DATA ## if not is_continuing: - temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt") + temp_path = os.path.join("saved_data", exp_name, "metadata.txt") try: - os.makedirs(os.path.join(root_dir, "saved_data", experiment_name)) + os.makedirs(os.path.join("saved_data", exp_name)) except: pass @@ -57,11 +63,10 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it f.write(f'POP_SIZE: {pop_size}\n') f.write(f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n') f.write(f'MAX_EVALUATIONS: {max_evaluations}\n') - f.write(f'TRAIN_ITERS: {train_iters}\n') f.close() else: - temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt") + temp_path = os.path.join("saved_data", exp_name, "metadata.txt") f = open(temp_path, "r") count = 0 for line in f: @@ -71,18 +76,15 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it structure_shape = (int(line.split()[1]), int(line.split()[2])) if count == 2: max_evaluations = int(line.split()[1]) - if count == 3: - train_iters = int(line.split()[1]) - tc.change_target(train_iters) count += 1 print(f'Starting training with pop_size {pop_size}, shape ({structure_shape[0]}, {structure_shape[1]}), ' + - f'max evals: {max_evaluations}, train iters {train_iters}.') + f'max evals: {max_evaluations}.') f.close() ### GENERATE // GET INITIAL POPULATION ### - structures = [] + structures: List[Structure] = [] population_structure_hashes = {} num_evaluations = 0 generation = 0 @@ -103,7 +105,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it else: for g in range(start_gen+1): for i in range(pop_size): - save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(g), "structure", str(i) + ".npz") + save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(g), "structure", str(i) + ".npz") np_data = np.load(save_path_structure) structure_data = [] for key, value in np_data.items(): @@ -125,8 +127,8 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it ### MAKE GENERATION DIRECTORIES ### - save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "structure") - save_path_controller = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller") + save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(generation), "structure") + save_path_controller = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller") try: os.makedirs(save_path_structure) @@ -150,19 +152,20 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it for structure in structures: if structure.is_survivor: - save_path_controller_part = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller", - "robot_" + str(structure.label) + "_controller" + ".pt") - save_path_controller_part_old = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation-1), "controller", - "robot_" + str(structure.prev_gen_label) + "_controller" + ".pt") + save_path_controller_part = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller", + f"{structure.label}.zip") + save_path_controller_part_old = os.path.join("saved_data", exp_name, "generation_" + str(generation-1), "controller", + f"{structure.prev_gen_label}.zip") print(f'Skipping training for {save_path_controller_part}.\n') try: shutil.copy(save_path_controller_part_old, save_path_controller_part) except: print(f'Error coppying controller for {save_path_controller_part}.\n') - else: - ppo_args = ((structure.body, structure.connections), tc, (save_path_controller, structure.label)) + else: + ppo_args = (args, structure.body, env_name, save_path_controller, f'{structure.label}', structure.connections) group.add_job(run_ppo, ppo_args, callback=structure.set_reward) + group.run_jobs(num_cores) @@ -177,7 +180,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it structures = sorted(structures, key=lambda structure: structure.fitness, reverse=True) #SAVE RANKING TO FILE - temp_path = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "output.txt") + temp_path = os.path.join("saved_data", exp_name, "generation_" + str(generation), "output.txt") f = open(temp_path, "w") out = "" diff --git a/examples/gym_test.py b/examples/gym_test.py index 2f39a3d0..d41d29bd 100644 --- a/examples/gym_test.py +++ b/examples/gym_test.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import evogym.envs from evogym import sample_robot @@ -6,15 +6,14 @@ if __name__ == '__main__': body, connections = sample_robot((5,5)) - env = gym.make('Walker-v0', body=body) + env = gym.make('Walker-v0', body=body, render_mode='human') env.reset() while True: - action = env.action_space.sample()-1 - ob, reward, done, info = env.step(action) - env.render() + action = env.action_space.sample() + ob, reward, terminated, truncated, info = env.step(action) - if done: + if terminated or truncated: env.reset() env.close() diff --git a/examples/make_gifs.py b/examples/make_gifs.py index 2a2a8efb..b79c22d8 100644 --- a/examples/make_gifs.py +++ b/examples/make_gifs.py @@ -1,20 +1,14 @@ - +import os import numpy as np import torch -import gym from PIL import Image import imageio from pygifsicle import optimize -import os, sys -root_dir = os.path.dirname(os.path.abspath(__file__)) -external_dir = os.path.join(root_dir, 'externals') -sys.path.insert(0, root_dir) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) - +import evogym.envs +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env from utils.algo_utils import * -from ppo.envs import make_vec_envs -from ppo.utils import get_vec_normalize import utils.mp_group as mp def get_generations(load_dir, exp_name): @@ -37,10 +31,7 @@ def get_exp_gen_data(exp_name, load_dir, gen): robot_data.append((int(line.split()[0]), float(line.split()[1]))) return robot_data -def dummy_callback(_): - pass - -def save_robot_gif(out_path, env_name, body_path, ctrl_path): +def save_robot_gif(out_path, env_name, body_path, ctrl_path, seed=42): global GIF_RESOLUTION structure_data = np.load(body_path) @@ -48,43 +39,23 @@ def save_robot_gif(out_path, env_name, body_path, ctrl_path): for key, value in structure_data.items(): structure.append(value) structure = tuple(structure) - - env = make_vec_envs(env_name, structure, 1000, 1, None, None, device='cpu', allow_early_resets=False) - env.get_attr("default_viewer", indices=None)[0].set_resolution(GIF_RESOLUTION) - - actor_critic, obs_rms = torch.load(ctrl_path, map_location='cpu') - - vec_norm = get_vec_normalize(env) - if vec_norm is not None: - vec_norm.eval() - vec_norm.obs_rms = obs_rms - - recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size) - masks = torch.zeros(1, 1) - - obs = env.reset() - img = env.render(mode='img') - reward = None + + model = PPO.load(ctrl_path) + + # Parallel environments + vec_env = make_vec_env(env_name, n_envs=1, seed=seed, env_kwargs={ + 'body': structure[0], + 'connections': structure[1], + "render_mode": "img", + }) + + obs = vec_env.reset() + imgs = [vec_env.env_method('render')[0]] # vec env is stupid; .render() dosent work done = False - - imgs = [] - # arrays = [] while not done: - - with torch.no_grad(): - value, action, _, recurrent_hidden_states = actor_critic.act( - obs, recurrent_hidden_states, masks, deterministic=True) - - obs, reward, done, _ = env.step(action) - img = env.render(mode='img') - imgs.append(img) - - masks.fill_(0.0 if (done) else 1.0) - - if done == True: - env.reset() - - env.close() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) + imgs.append(vec_env.env_method('render')[0]) imageio.mimsave(f'{out_path}.gif', imgs, duration=(1/50.0)) try: @@ -120,7 +91,6 @@ def __str__(self): out += f'{comp}_' return out[:-1] - class Job(): def __init__( self, @@ -212,7 +182,7 @@ def generate(self, load_dir, save_dir, depth=0): for idx, reward in get_exp_gen_data(exp_name, load_dir, gen): robots.append(Robot( body_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "structure", f"{idx}.npz"), - ctrl_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "controller", f"robot_{idx}_controller.pt"), + ctrl_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "controller", f"{idx}.zip"), reward = reward, env_name = env_name, exp_name = exp_name if len(self.experiment_names) != 1 else None, @@ -231,25 +201,11 @@ def generate(self, load_dir, save_dir, depth=0): robot.body_path, robot.ctrl_path ) - - # multiprocessing is currently broken - - # group = mp.Group() - # for i, robot in zip(ranks, robots): - # gif_args = ( - # os.path.join(save_dir, f'{i}_{robot}'), - # robot.env_name, - # robot.body_path, - # robot.ctrl_path - # ) - # group.add_job(save_robot_gif, gif_args, callback=dummy_callback) - # group.run_jobs(NUM_PROC) GIF_RESOLUTION = (1280/5, 720/5) -# NUM_PROC = 8 if __name__ == '__main__': exp_root = os.path.join('saved_data') - save_dir = os.path.join(root_dir, 'saved_data', 'all_media') + save_dir = os.path.join('saved_data', 'all_media') my_job = Job( name = 'test_ga', diff --git a/examples/ppo/__init__.py b/examples/ppo/__init__.py deleted file mode 100644 index 9108e19a..00000000 --- a/examples/ppo/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from ppo import arguments -from ppo.run import run_ppo \ No newline at end of file diff --git a/examples/ppo/args.py b/examples/ppo/args.py new file mode 100644 index 00000000..b0b2f8a6 --- /dev/null +++ b/examples/ppo/args.py @@ -0,0 +1,60 @@ +import argparse + +def add_ppo_args(parser: argparse.ArgumentParser) -> None: + """ + Add PPO arguments to the parser + """ + + ppo_parser: argparse.ArgumentParser = parser.add_argument_group('ppo arguments') + + ppo_parser.add_argument( + '--verbose-ppo', default=1, type=int, help='Verbosity level for PPO: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages (default: 1)' + ) + ppo_parser.add_argument( + '--learning-rate', default=2.5e-4, type=float, help='Learning rate for PPO (default: 2.5e-4)' + ) + ppo_parser.add_argument( + '--n-steps', default=128, type=int, help='The number of steps to run for each environment per update for PPO (i.e. rollout buffer size is n_steps * n_envs where n_envs is number of environment copies running in parallel) (default: 128)' + ) + ppo_parser.add_argument( + '--batch-size', default=4, type=int, help='Mini-batch size for PPO (default: 4)' + ) + ppo_parser.add_argument( + '--n-epochs', default=4, type=int, help='Number of epochs when optimizing the surrogate objective for PPO (default: 4)' + ) + ppo_parser.add_argument( + '--gamma', default=0.99, type=float, help='Discount factor for PPO (default: 0.99)' + ) + ppo_parser.add_argument( + '--gae-lambda', default=0.95, type=float, help='Lambda parameter for Generalized Advantage Estimation for PPO (default: 0.95)' + ) + ppo_parser.add_argument( + '--vf-coef', default=0.5, type=float, help='Value function coefficient for PPO loss calculation (default: 0.5)' + ) + ppo_parser.add_argument( + '--max-grad-norm', default=0.5, type=float, help='The maximum value of the gradient clipping for PPO (default: 0.5)' + ) + ppo_parser.add_argument( + '--ent-coef', default=0.01, type=float, help='Entropy coefficient for PPO loss calculation (default: 0.01)' + ) + ppo_parser.add_argument( + '--clip-range', default=0.1, type=float, help='Clipping parameter for PPO (default: 0.1)' + ) + ppo_parser.add_argument( + '--total-timesteps', default=1e6, type=int, help='Total number of timesteps for PPO (default: 1e6)' + ) + ppo_parser.add_argument( + '--log-interval', default=50, type=int, help='Episodes before logging PPO (default: 50)' + ) + ppo_parser.add_argument( + '--n-envs', default=1, type=int, help='Number of parallel environments for PPO (default: 1)' + ) + ppo_parser.add_argument( + '--n-eval-envs', default=1, type=int, help='Number of parallel environments for PPO evaluation (default: 1)' + ) + ppo_parser.add_argument( + '--n-evals', default=1, type=int, help='Number of times to run the environment during each eval (default: 1)' + ) + ppo_parser.add_argument( + '--eval-interval', default=1e5, type=int, help='Number of steps before evaluating PPO model (default: 1e5)' + ) \ No newline at end of file diff --git a/examples/ppo/arguments.py b/examples/ppo/arguments.py deleted file mode 100644 index d99c6e6a..00000000 --- a/examples/ppo/arguments.py +++ /dev/null @@ -1,167 +0,0 @@ -import argparse -import torch - -# Derived from -# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail - -def get_args(): - parser = argparse.ArgumentParser(description='RL') - parser.add_argument( - '--algo', default='ppo', help='algorithm to use: a2c | ppo | acktr') - parser.add_argument( - '--gail', - action='store_true', - default=False, - help='do imitation learning with gail') - parser.add_argument( - '--gail-experts-dir', - default='./gail_experts', - help='directory that contains expert demonstrations for gail') - parser.add_argument( - '--gail-batch-size', - type=int, - default=128, - help='gail batch size (default: 128)') - parser.add_argument( - '--gail-epoch', type=int, default=5, help='gail epochs (default: 5)') - parser.add_argument( - '--lr', type=float, default=2.5e-4, help='learning rate (default: 2.5e-4)') - parser.add_argument( - '--eps', - type=float, - default=1e-5, - help='RMSprop optimizer epsilon (default: 1e-5)') - parser.add_argument( - '--alpha', - type=float, - default=0.99, - help='RMSprop optimizer apha (default: 0.99)') - parser.add_argument( - '--gamma', - type=float, - default=0.99, - help='discount factor for rewards (default: 0.99)') - parser.add_argument( - '--use-gae', - action='store_true', - default=True, - help='use generalized advantage estimation') - parser.add_argument( - '--gae-lambda', - type=float, - default=0.95, - help='gae lambda parameter (default: 0.95)') - parser.add_argument( - '--entropy-coef', - type=float, - default=0.01, - help='entropy term coefficient (default: 0.01)') - parser.add_argument( - '--value-loss-coef', - type=float, - default=0.5, - help='value loss coefficient (default: 0.5)') - parser.add_argument( - '--max-grad-norm', - type=float, - default=0.5, - help='max norm of gradients (default: 0.5)') - parser.add_argument( - '--seed', type=int, default=1, help='random seed (default: 1)') - parser.add_argument( - '--cuda-deterministic', - action='store_true', - default=False, - help="sets flags for determinism when using CUDA (potentially slow!)") - parser.add_argument( - '--num-processes', - type=int, - default=1, - help='how many training CPU processes to use (default: 1)') - parser.add_argument( - '--num-steps', - type=int, - default=128, - help='number of forward steps in A2C / num steps to use in PPO (default: 128)') - parser.add_argument( - '--ppo-epoch', - type=int, - default=4, - help='number of ppo epochs (default: 4)') - parser.add_argument( - '--num-mini-batch', - type=int, - default=4, - help='number of batches for ppo (default: 4)') - parser.add_argument( - '--clip-param', - type=float, - default=0.1, - help='ppo clip parameter (default: 0.1)') - parser.add_argument( - '--log-interval', - type=int, - default=10, - help='log interval, one log per n updates (default: 10)') - parser.add_argument( - '--save-interval', - type=int, - default=100, - help='save interval, one save per n updates (default: 100)') - parser.add_argument( - '--num-evals', - type=int, - default=1, - help='number of times to evaluate each controller (for evaluation purposes not training). (default: 1) as most Evolution Gym environments are deterministic.') - parser.add_argument( - '--eval-interval', - type=int, - default=None, - help='eval interval, one eval per n updates (default: None)') - parser.add_argument( - '--num-env-steps', - type=int, - default=10e6, - help='number of environment steps to train (default: 10e6)') - parser.add_argument( - '--env-name', - default='roboticgamedesign-v0', - help='environment to train on (default: roboticgamedesign-v0)') - parser.add_argument( - '--log-dir', - default='/tmp/gym/', - help='directory to save agent logs (default: /tmp/gym)') - parser.add_argument( - '--save-dir', - default='./trained_models/', - help='directory to save agent logs (default: ./trained_models/)') - parser.add_argument( - '--no-cuda', - action='store_true', - default=False, - help='disables CUDA training') - parser.add_argument( - '--use-proper-time-limits', - action='store_true', - default=False, - help='compute returns taking into account time limits') - parser.add_argument( - '--recurrent-policy', - action='store_true', - default=False, - help='use a recurrent policy') - parser.add_argument( - '--use-linear-lr-decay', - action='store_true', - default=True, - help='use a linear schedule on the learning rate') - args = parser.parse_args() - - args.cuda = not args.no_cuda and torch.cuda.is_available() - - assert args.algo in ['a2c', 'ppo', 'acktr'] - if args.recurrent_policy: - assert args.algo in ['a2c', 'ppo'], \ - 'Recurrent policy is not implemented for ACKTR' - - return args diff --git a/examples/ppo/callback.py b/examples/ppo/callback.py new file mode 100644 index 00000000..a6e6b72b --- /dev/null +++ b/examples/ppo/callback.py @@ -0,0 +1,116 @@ +import os +from typing import List, Optional +import numpy as np +from ppo.eval import eval_policy +from stable_baselines3.common.callbacks import BaseCallback + +class EvalCallback(BaseCallback): + """ + A custom callback that derives from ``BaseCallback``. + + :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages + """ + def __init__( + self, + body: np.ndarray, + env_name: str, + eval_every: int, + n_evals: int, + n_envs: int, + model_save_dir: str, + model_save_name: str, + connections: Optional[np.ndarray] = None, + verbose: int = 0 + ): + super().__init__(verbose) + # Those variables will be accessible in the callback + # (they are defined in the base class) + # The RL model + # self.model = None # type: BaseAlgorithm + # An alias for self.model.get_env(), the environment used for training + # self.training_env # type: VecEnv + # Number of time the callback was called + # self.n_calls = 0 # type: int + # num_timesteps = n_envs * n times env.step() was called + # self.num_timesteps = 0 # type: int + # local and global variables + # self.locals = {} # type: Dict[str, Any] + # self.globals = {} # type: Dict[str, Any] + # The logger object, used to report things in the terminal + # self.logger # type: stable_baselines3.common.logger.Logger + # Sometimes, for event callback, it is useful + # to have access to the parent object + # self.parent = None # type: Optional[BaseCallback] + + self.body = body + self.connections = connections + self.env_name = env_name + self.eval_every = eval_every + self.n_evals = n_evals + self.n_envs = n_envs + self.model_save_dir = model_save_dir + self.model_save_name = model_save_name + + if not os.path.exists(model_save_dir): + os.makedirs(model_save_dir) + + self.best_reward = -float('inf') + + def _on_training_start(self) -> None: + """ + This method is called before the first rollout starts. + """ + pass + + def _on_rollout_start(self) -> None: + """ + A rollout is the collection of environment interaction + using the current policy. + This event is triggered before collecting new samples. + """ + pass + + def _on_step(self) -> bool: + """ + This method will be called by the model after each call to `env.step()`. + + For child callback (of an `EventCallback`), this will be called + when the event is triggered. + + :return: If the callback returns False, training is aborted early. + """ + + if self.num_timesteps % self.eval_every == 0: + self._validate_and_save() + return True + + def _on_rollout_end(self) -> None: + """ + This event is triggered before updating the policy. + """ + pass + + def _on_training_end(self) -> None: + """ + This event is triggered before exiting the `learn()` method. + """ + self._validate_and_save() + + def _validate_and_save(self) -> None: + rewards = eval_policy( + model=self.model, + body=self.body, + connections=self.connections, + env_name=self.env_name, + n_evals=self.n_evals, + n_envs=self.n_envs, + ) + out = f"[{self.model_save_name}] Mean: {np.mean(rewards):.3}, Std: {np.std(rewards):.3}, Min: {np.min(rewards):.3}, Max: {np.max(rewards):.3}" + mean_reward = np.mean(rewards).item() + if mean_reward > self.best_reward: + out += f" NEW BEST ({mean_reward:.3} > {self.best_reward:.3})" + self.best_reward = mean_reward + self.model.save(os.path.join(self.model_save_dir, self.model_save_name)) + if self.verbose > 0: + print(out) + \ No newline at end of file diff --git a/examples/ppo/envs.py b/examples/ppo/envs.py deleted file mode 100644 index ab23424d..00000000 --- a/examples/ppo/envs.py +++ /dev/null @@ -1,258 +0,0 @@ -import os -import gym -import numpy as np -import torch -from gym.spaces.box import Box - -from stable_baselines3.common.monitor import Monitor -from stable_baselines3.common.atari_wrappers import ( - NoopResetEnv, MaxAndSkipEnv, EpisodicLifeEnv, FireResetEnv, WarpFrame, ClipRewardEnv) -from stable_baselines3.common.vec_env import VecEnvWrapper, DummyVecEnv, SubprocVecEnv -from stable_baselines3.common.vec_env.vec_normalize import \ - VecNormalize as VecNormalize_ - -try: - import dm_control2gym -except ImportError: - pass - -try: - import roboschool -except ImportError: - pass - -try: - import pybullet_envs -except ImportError: - pass - -# Derived from -# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail - -def make_env(env_id, robot_structure, seed, rank, log_dir, allow_early_resets): - def _thunk(): - if env_id.startswith("dm"): - _, domain, task = env_id.split('.') - env = dm_control2gym.make(domain_name=domain, task_name=task) - else: - env = gym.make(env_id, body = robot_structure[0], connections = robot_structure[1]) - - is_atari = hasattr(gym.envs, 'atari') and isinstance( - env.unwrapped, gym.envs.atari.atari_env.AtariEnv) - if is_atari: - env = NoopResetEnv(env, noop_max=30) - env = MaxAndSkipEnv(env, skip=4) - - env.seed(seed + rank) - - if str(env.__class__.__name__).find('TimeLimit') >= 0: - env = TimeLimitMask(env) - - if log_dir is not None: - env = Monitor( - env, - os.path.join(log_dir, str(rank)), - allow_early_resets=allow_early_resets) - - if is_atari: - if len(env.observation_space.shape) == 3: - env = EpisodicLifeEnv(env) - if "FIRE" in env.unwrapped.get_action_meanings(): - env = FireResetEnv(env) - env = WarpFrame(env, width=84, height=84) - env = ClipRewardEnv(env) - elif len(env.observation_space.shape) == 3: - #ASK ABOUT THIS - pass - # raise NotImplementedError( - # "CNN models work only for atari,\n" - # "please use a custom wrapper for a custom pixel input env.\n" - # "See wrap_deepmind for an example.") - - # If the input has shape (W,H,3), wrap for PyTorch convolutions - obs_shape = env.observation_space.shape - if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: - env = TransposeImage(env, op=[2, 0, 1]) - - return env - - return _thunk - - -def make_vec_envs(env_name, - robot_structure, - seed, - num_processes, - gamma, - log_dir, - device, - allow_early_resets, - num_frame_stack=None): - envs = [ - make_env(env_name, robot_structure, seed, i, log_dir, allow_early_resets) - for i in range(num_processes) - ] - - if len(envs) > 1: - envs = SubprocVecEnv(envs) - else: - envs = DummyVecEnv(envs) - - if len(envs.observation_space.shape) == 1: - if gamma is None: - envs = VecNormalize(envs, norm_reward=False) - else: - envs = VecNormalize(envs, gamma=gamma) - - envs = VecPyTorch(envs, device) - - if num_frame_stack is not None: - envs = VecPyTorchFrameStack(envs, num_frame_stack, device) - elif len(envs.observation_space.shape) == 3: - envs = VecPyTorchFrameStack(envs, 4, device) - - return envs - - -# Checks whether done was caused my timit limits or not -class TimeLimitMask(gym.Wrapper): - def step(self, action): - obs, rew, done, info = self.env.step(action) - if done and self.env._max_episode_steps == self.env._elapsed_steps: - info['bad_transition'] = True - - return obs, rew, done, info - - def reset(self, **kwargs): - return self.env.reset(**kwargs) - - -# Can be used to test recurrent policies for Reacher-v2 -class MaskGoal(gym.ObservationWrapper): - def observation(self, observation): - if self.env._elapsed_steps > 0: - observation[-2:] = 0 - return observation - - -class TransposeObs(gym.ObservationWrapper): - def __init__(self, env=None): - """ - Transpose observation space (base class) - """ - super(TransposeObs, self).__init__(env) - - -class TransposeImage(TransposeObs): - def __init__(self, env=None, op=[2, 0, 1]): - """ - Transpose observation space for images - """ - super(TransposeImage, self).__init__(env) - assert len(op) == 3, "Error: Operation, " + str(op) + ", must be dim3" - self.op = op - obs_shape = self.observation_space.shape - self.observation_space = Box( - self.observation_space.low[0, 0, 0], - self.observation_space.high[0, 0, 0], [ - obs_shape[self.op[0]], obs_shape[self.op[1]], - obs_shape[self.op[2]] - ], - dtype=self.observation_space.dtype) - - def observation(self, ob): - return ob.transpose(self.op[0], self.op[1], self.op[2]) - - -class VecPyTorch(VecEnvWrapper): - def __init__(self, venv, device): - """Return only every `skip`-th frame""" - super(VecPyTorch, self).__init__(venv) - self.device = device - # TODO: Fix data types - - def reset(self): - obs = self.venv.reset() - obs = torch.from_numpy(obs).float().to(self.device) - return obs - - def step_async(self, actions): - if isinstance(actions, torch.LongTensor): - # Squeeze the dimension for discrete actions - actions = actions.squeeze(1) - actions = actions.cpu().numpy() - self.venv.step_async(actions) - - def step_wait(self): - obs, reward, done, info = self.venv.step_wait() - obs = torch.from_numpy(obs).float().to(self.device) - reward = torch.from_numpy(reward).unsqueeze(dim=1).float() - return obs, reward, done, info - - -class VecNormalize(VecNormalize_): - def __init__(self, *args, **kwargs): - super(VecNormalize, self).__init__(*args, **kwargs) - self.training = True - - def _obfilt(self, obs, update=True): - if self.obs_rms: - if self.training and update: - self.obs_rms.update(obs) - obs = np.clip((obs - self.obs_rms.mean) / - np.sqrt(self.obs_rms.var + self.epsilon), - -self.clipob, self.clipob) - return obs - else: - return obs - - def train(self): - self.training = True - - def eval(self): - self.training = False - - -# Derived from -# https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_frame_stack.py -class VecPyTorchFrameStack(VecEnvWrapper): - def __init__(self, venv, nstack, device=None): - self.venv = venv - self.nstack = nstack - - wos = venv.observation_space # wrapped ob space - self.shape_dim0 = wos.shape[0] - - low = np.repeat(wos.low, self.nstack, axis=0) - high = np.repeat(wos.high, self.nstack, axis=0) - - if device is None: - device = torch.device('cpu') - self.stacked_obs = torch.zeros((venv.num_envs, ) + - low.shape).to(device) - - observation_space = gym.spaces.Box( - low=low, high=high, dtype=venv.observation_space.dtype) - VecEnvWrapper.__init__(self, venv, observation_space=observation_space) - - def step_wait(self): - obs, rews, news, infos = self.venv.step_wait() - self.stacked_obs[:, :-self.shape_dim0] = \ - self.stacked_obs[:, self.shape_dim0:].clone() - for (i, new) in enumerate(news): - if new: - self.stacked_obs[i] = 0 - self.stacked_obs[:, -self.shape_dim0:] = obs - return self.stacked_obs, rews, news, infos - - def reset(self): - obs = self.venv.reset() - if torch.backends.cudnn.deterministic: - self.stacked_obs = torch.zeros(self.stacked_obs.shape) - else: - self.stacked_obs.zero_() - self.stacked_obs[:, -self.shape_dim0:] = obs - return self.stacked_obs - - def close(self): - self.venv.close() diff --git a/examples/ppo/eval.py b/examples/ppo/eval.py new file mode 100644 index 00000000..3d1f6e2b --- /dev/null +++ b/examples/ppo/eval.py @@ -0,0 +1,66 @@ +from typing import List, Optional +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env + +def eval_policy( + model: PPO, + body: np.ndarray, + env_name: str, + n_evals: int = 1, + n_envs: int = 1, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + deterministic_policy: bool = False, + seed: int = 42, + verbose: bool = False, +) -> List[float]: + """ + Evaluate the controller for the robot in the environment. + Returns the result of `n_evals` evaluations. + """ + + def run_evals(n: int) -> List[float]: + """ + Run `n` evaluations in parallel. + """ + + # Parallel environments + vec_env = make_vec_env(env_name, n_envs=n, seed=seed, env_kwargs={ + 'body': body, + 'connections': connections, + "render_mode": render_mode, + }) + + # Evaluate + rewards = [] + obs = vec_env.reset() + cum_done = np.array([False]*n) + while not np.all(cum_done): + action, _states = model.predict(obs, deterministic=deterministic_policy) + obs, reward, done, info = vec_env.step(action) + + # Track when environments terminate + if verbose: + for i, (d, cd) in enumerate(zip(done, cum_done)): + if d and not cd: + print(f"Environment {i} terminated after {len(rewards)} steps") + + # Keep track of done environments + cum_done = np.logical_or(cum_done, done) + + # Update rewards -- done environments will not be updated + reward[cum_done] = 0 + rewards.append(reward) + vec_env.close() + + # Sum rewards over time + rewards = np.asarray(rewards) + return np.sum(rewards, axis=0) + + # Run evaluations n_envs at a time + rewards = [] + for i in range(np.ceil(n_evals/n_envs).astype(int)): + rewards.extend(run_evals(min(n_envs, n_evals - i*n_envs))) + + return rewards diff --git a/examples/ppo/evaluate.py b/examples/ppo/evaluate.py deleted file mode 100644 index 7a138da6..00000000 --- a/examples/ppo/evaluate.py +++ /dev/null @@ -1,59 +0,0 @@ -import numpy as np -import torch -from ppo import utils -from ppo.envs import make_vec_envs - -# Derived from -# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail - -def evaluate( - num_evals, - actor_critic, - obs_rms, - env_name, - robot_structure, - seed, - num_processes, - eval_log_dir, - device): - - num_processes = min(num_processes, num_evals) - - eval_envs = make_vec_envs(env_name, robot_structure, seed + num_processes, num_processes, - None, eval_log_dir, device, True) - - vec_norm = utils.get_vec_normalize(eval_envs) - if vec_norm is not None: - vec_norm.eval() - vec_norm.obs_rms = obs_rms - - eval_episode_rewards = [] - - obs = eval_envs.reset() - eval_recurrent_hidden_states = torch.zeros( - num_processes, actor_critic.recurrent_hidden_state_size, device=device) - eval_masks = torch.zeros(num_processes, 1, device=device) - - while len(eval_episode_rewards) < num_evals: - with torch.no_grad(): - _, action, _, eval_recurrent_hidden_states = actor_critic.act( - obs, - eval_recurrent_hidden_states, - eval_masks, - deterministic=True) - - # Obser reward and next obs - obs, _, done, infos = eval_envs.step(action) - - eval_masks = torch.tensor( - [[0.0] if done_ else [1.0] for done_ in done], - dtype=torch.float32, - device=device) - - for info in infos: - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - eval_envs.close() - - return np.mean(eval_episode_rewards) diff --git a/examples/ppo/group_ppo.py b/examples/ppo/group_ppo.py index 4f815172..899c13bc 100644 --- a/examples/ppo/group_ppo.py +++ b/examples/ppo/group_ppo.py @@ -1,33 +1,24 @@ +import os import numpy as np import json import shutil -import random +import argparse -import sys -import os -curr_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.join(curr_dir, '..') -external_dir = os.path.join(root_dir, 'externals') -sys.path.insert(0, root_dir) -sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT')) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) - -from ppo import run_ppo +from ppo.run import run_ppo +from ppo.args import add_ppo_args import utils.mp_group as mp -from utils.algo_utils import * - +import evogym.envs from evogym import WorldObject class SimJob(): - def __init__(self, name, robots, envs, train_iters): + def __init__(self, name, robots, envs): self.name = name self.robots = robots self.envs = envs - self.train_iters = train_iters def get_data(self,): - return {'robots': self.robots, 'envs': self.envs, 'train_iters': self.train_iters} + return {'robots': self.robots, 'envs': self.envs} class RunData(): @@ -41,14 +32,13 @@ def set_reward(self, reward): self.reward = reward def read_robot_from_file(file_name): - global root_dir possible_paths = [ os.path.join(file_name), os.path.join(f'{file_name}.npz'), os.path.join(f'{file_name}.json'), - os.path.join(root_dir, 'world_data', file_name), - os.path.join(root_dir, 'world_data', f'{file_name}.npz'), - os.path.join(root_dir, 'world_data', f'{file_name}.json'), + os.path.join('world_data', file_name), + os.path.join('world_data', f'{file_name}.npz'), + os.path.join('world_data', f'{file_name}.json'), ] best_path = None @@ -78,14 +68,18 @@ def clean_name(name): return name def run_group_ppo(experiment_name, sim_jobs): + ### ARGS ### + parser = argparse.ArgumentParser(description='Arguments for group PPO script') + add_ppo_args(parser) + args = parser.parse_args() - ### STARTUP: MANAGE DIRECTORIES ### - exp_path = os.path.join(root_dir, "saved_data", experiment_name) + ### MANAGE DIRECTORIES ### + exp_path = os.path.join("saved_data", experiment_name) try: os.makedirs(exp_path) except: print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS') - print("Override? (y/n): ", end="") + print("Delete and override? (y/n): ", end="") ans = input() if ans.lower() == "y": shutil.rmtree(exp_path) @@ -114,8 +108,6 @@ def run_group_ppo(experiment_name, sim_jobs): except: pass - tc = TerminationCondition(job.train_iters) - count = 0 for env_name in job.envs: out[job.name][env_name] = {} @@ -127,9 +119,10 @@ def run_group_ppo(experiment_name, sim_jobs): temp_path = os.path.join(save_path_structure, f'{clean_name(robot_name)}_{env_name}.npz') np.savez(temp_path, structure[0], structure[1]) - - ppo_args = ((structure[0], structure[1]), tc, (save_path_controller, f'{clean_name(robot_name)}_{env_name}'), env_name, True) + + ppo_args = (args, structure[0], env_name, save_path_controller, f'{clean_name(robot_name)}_{env_name}', structure[1]) group.add_job(run_ppo, ppo_args, callback=run_data[-1].set_reward) + group.run_jobs(2) ### SAVE RANKING TO FILE ## diff --git a/examples/ppo/run.py b/examples/ppo/run.py index 5c017744..f054f360 100644 --- a/examples/ppo/run.py +++ b/examples/ppo/run.py @@ -1,242 +1,64 @@ -import os, sys -sys.path.insert(1, os.path.join(sys.path[0], 'externals', 'pytorch_a2c_ppo_acktr_gail')) - +import argparse +from typing import Optional import numpy as np -import time -from collections import deque -import torch - -from ppo import utils -from ppo.arguments import get_args -from ppo.evaluate import evaluate -from ppo.envs import make_vec_envs - -from a2c_ppo_acktr import algo -from a2c_ppo_acktr.algo import gail -from a2c_ppo_acktr.model import Policy -from a2c_ppo_acktr.storage import RolloutStorage +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env -import evogym.envs - -# Derived from -# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail +from ppo.eval import eval_policy +from ppo.callback import EvalCallback def run_ppo( - structure, - termination_condition, - saving_convention, - override_env_name = None, - verbose = True): - - assert (structure == None) == (termination_condition == None) and (structure == None) == (saving_convention == None) - - print(f'Starting training on \n{structure}\nat {saving_convention}...\n') - - args = get_args() - if override_env_name: - args.env_name = override_env_name - - torch.manual_seed(args.seed) - torch.cuda.manual_seed_all(args.seed) - - if args.cuda and torch.cuda.is_available() and args.cuda_deterministic: - torch.backends.cudnn.benchmark = False - torch.backends.cudnn.deterministic = True - - log_dir = args.log_dir - if saving_convention != None: - log_dir = os.path.join(saving_convention[0], log_dir, "robot_" + str(saving_convention[1])) - eval_log_dir = log_dir + "_eval" - utils.cleanup_log_dir(log_dir) - utils.cleanup_log_dir(eval_log_dir) - - torch.set_num_threads(1) - device = torch.device("cuda:0" if args.cuda else "cpu") - - envs = make_vec_envs(args.env_name, structure, args.seed, args.num_processes, - args.gamma, args.log_dir, device, False) - - actor_critic = Policy( - envs.observation_space.shape, - envs.action_space, - base_kwargs={'recurrent': args.recurrent_policy}) - actor_critic.to(device) - - if args.algo == 'a2c': - print('Warning: this code has only been tested with ppo.') - agent = algo.A2C_ACKTR( - actor_critic, - args.value_loss_coef, - args.entropy_coef, - lr=args.lr, - eps=args.eps, - alpha=args.alpha, - max_grad_norm=args.max_grad_norm) - elif args.algo == 'ppo': - agent = algo.PPO( - actor_critic, - args.clip_param, - args.ppo_epoch, - args.num_mini_batch, - args.value_loss_coef, - args.entropy_coef, - lr=args.lr, - eps=args.eps, - max_grad_norm=args.max_grad_norm) - elif args.algo == 'acktr': - print('Warning: this code has only been tested with ppo.') - agent = algo.A2C_ACKTR( - actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) - - if args.gail: - assert len(envs.observation_space.shape) == 1 - discr = gail.Discriminator( - envs.observation_space.shape[0] + envs.action_space.shape[0], 100, - device) - file_name = os.path.join( - args.gail_experts_dir, "trajs_{}.pt".format( - args.env_name.split('-')[0].lower())) - - expert_dataset = gail.ExpertDataset( - file_name, num_trajectories=4, subsample_frequency=20) - drop_last = len(expert_dataset) > args.gail_batch_size - gail_train_loader = torch.utils.data.DataLoader( - dataset=expert_dataset, - batch_size=args.gail_batch_size, - shuffle=True, - drop_last=drop_last) - - rollouts = RolloutStorage(args.num_steps, args.num_processes, - envs.observation_space.shape, envs.action_space, - actor_critic.recurrent_hidden_state_size) - - obs = envs.reset() - rollouts.obs[0].copy_(obs) - rollouts.to(device) - - episode_rewards = deque(maxlen=10) - - start = time.time() - num_updates = int( - args.num_env_steps) // args.num_steps // args.num_processes - - rewards_tracker = [] - avg_rewards_tracker = [] - sliding_window_size = 10 - max_determ_avg_reward = float('-inf') - - for j in range(num_updates): - - if args.use_linear_lr_decay: - # decrease learning rate linearly - utils.update_linear_schedule( - agent.optimizer, j, num_updates, - agent.optimizer.lr if args.algo == "acktr" else args.lr) - - for step in range(args.num_steps): - # Sample actions - with torch.no_grad(): - value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( - rollouts.obs[step], rollouts.recurrent_hidden_states[step], - rollouts.masks[step]) - - # Obser reward and next obs - obs, reward, done, infos = envs.step(action) - - # track rewards - for info in infos: - if 'episode' in info.keys(): - episode_rewards.append(info['episode']['r']) - rewards_tracker.append(info['episode']['r']) - if len(rewards_tracker) < 10: - avg_rewards_tracker.append(np.average(np.array(rewards_tracker))) - else: - avg_rewards_tracker.append(np.average(np.array(rewards_tracker[-10:]))) - - # If done then clean the history of observations. - masks = torch.FloatTensor( - [[0.0] if done_ else [1.0] for done_ in done]) - bad_masks = torch.FloatTensor( - [[0.0] if 'bad_transition' in info.keys() else [1.0] - for info in infos]) - rollouts.insert(obs, recurrent_hidden_states, action, - action_log_prob, value, reward, masks, bad_masks) - - with torch.no_grad(): - next_value = actor_critic.get_value( - rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], - rollouts.masks[-1]).detach() - - if args.gail: - if j >= 10: - envs.venv.eval() - - gail_epoch = args.gail_epoch - if j < 10: - gail_epoch = 100 # Warm up - for _ in range(gail_epoch): - discr.update(gail_train_loader, rollouts, - utils.get_vec_normalize(envs)._obfilt) - - for step in range(args.num_steps): - rollouts.rewards[step] = discr.predict_reward( - rollouts.obs[step], rollouts.actions[step], args.gamma, - rollouts.masks[step]) - - rollouts.compute_returns(next_value, args.use_gae, args.gamma, - args.gae_lambda, args.use_proper_time_limits) - - value_loss, action_loss, dist_entropy = agent.update(rollouts) - - rollouts.after_update() - - # print status - if j % args.log_interval == 0 and len(episode_rewards) > 1 and verbose: - total_num_steps = (j + 1) * args.num_processes * args.num_steps - end = time.time() - print( - "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n" - .format(j, total_num_steps, - int(total_num_steps / (end - start)), - len(episode_rewards), np.mean(episode_rewards), - np.median(episode_rewards), np.min(episode_rewards), - np.max(episode_rewards), dist_entropy, value_loss, - action_loss)) - - # evaluate the controller and save it if it does the best so far - if (args.eval_interval is not None and len(episode_rewards) > 1 - and j % args.eval_interval == 0): - - obs_rms = utils.get_vec_normalize(envs).obs_rms - determ_avg_reward = evaluate(args.num_evals, actor_critic, obs_rms, args.env_name, structure, args.seed, - args.num_processes, eval_log_dir, device) - - if verbose: - if saving_convention != None: - print(f'Evaluated {saving_convention[1]} using {args.num_evals} episodes. Mean reward: {np.mean(determ_avg_reward)}\n') - else: - print(f'Evaluated using {args.num_evals} episodes. Mean reward: {np.mean(determ_avg_reward)}\n') - - if determ_avg_reward > max_determ_avg_reward: - max_determ_avg_reward = determ_avg_reward - - temp_path = os.path.join(args.save_dir, args.algo, args.env_name + ".pt") - if saving_convention != None: - temp_path = os.path.join(saving_convention[0], "robot_" + str(saving_convention[1]) + "_controller" + ".pt") - - if verbose: - print(f'Saving {temp_path} with avg reward {max_determ_avg_reward}\n') - torch.save([ - actor_critic, - getattr(utils.get_vec_normalize(envs), 'obs_rms', None) - ], temp_path) - - # return upon reaching the termination condition - if not termination_condition == None: - if termination_condition(j): - if verbose: - print(f'{saving_convention} has met termination condition ({j})...terminating...\n') - return max_determ_avg_reward - -#python ppo_main_test.py --env-name "roboticgamedesign-v0" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 1 --num-steps 128 --num-mini-batch 4 --log-interval 1 --use-linear-lr-decay --entropy-coef 0.01 -#python ppo.py --env-name "roboticgamedesign-v0" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 0.5 --num-processes 8 --num-steps 128 --num-mini-batch 4 --log-interval 1 --use-linear-lr-decay --entropy-coef 0.01 --log-dir "logs/" + args: argparse.Namespace, + body: np.ndarray, + env_name: str, + model_save_dir: str, + model_save_name: str, + connections: Optional[np.ndarray] = None, + seed: int = 42, +) -> float: + """ + Run ppo and return the best reward achieved during evaluation. + """ + + # Parallel environments + vec_env = make_vec_env(env_name, n_envs=1, seed=seed, env_kwargs={ + 'body': body, + 'connections': connections, + }) + + # Eval Callback + callback = EvalCallback( + body=body, + connections=connections, + env_name=env_name, + eval_every=args.eval_interval, + n_evals=args.n_evals, + n_envs=args.n_eval_envs, + model_save_dir=model_save_dir, + model_save_name=model_save_name, + verbose=args.verbose_ppo, + ) + + # Train + model = PPO( + "MlpPolicy", + vec_env, + verbose=args.verbose_ppo, + learning_rate=args.learning_rate, + n_steps=args.n_steps, + batch_size=args.batch_size, + n_epochs=args.n_epochs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + vf_coef=args.vf_coef, + max_grad_norm=args.max_grad_norm, + ent_coef=args.ent_coef, + clip_range=args.clip_range + ) + model.learn( + total_timesteps=args.total_timesteps, + callback=callback, + log_interval=args.log_interval + ) + + return callback.best_reward \ No newline at end of file diff --git a/examples/ppo/utils.py b/examples/ppo/utils.py deleted file mode 100644 index a023c7bd..00000000 --- a/examples/ppo/utils.py +++ /dev/null @@ -1,69 +0,0 @@ -import glob -import os - -import torch -import torch.nn as nn - -from ppo.envs import VecNormalize - -# Derived from -# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail - -# Get a render function -def get_render_func(venv): - if hasattr(venv, 'envs'): - return venv.envs[0].render - elif hasattr(venv, 'venv'): - return get_render_func(venv.venv) - elif hasattr(venv, 'env'): - return get_render_func(venv.env) - - return None - - -def get_vec_normalize(venv): - if isinstance(venv, VecNormalize): - return venv - elif hasattr(venv, 'venv'): - return get_vec_normalize(venv.venv) - - return None - - -# Necessary for my KFAC implementation. -class AddBias(nn.Module): - def __init__(self, bias): - super(AddBias, self).__init__() - self._bias = nn.Parameter(bias.unsqueeze(1)) - - def forward(self, x): - if x.dim() == 2: - bias = self._bias.t().view(1, -1) - else: - bias = self._bias.t().view(1, -1, 1, 1) - - return x + bias - - -def update_linear_schedule(optimizer, epoch, total_num_epochs, initial_lr): - """Decreases the learning rate linearly""" - lr = initial_lr - (initial_lr * (epoch / float(total_num_epochs))) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def init(module, weight_init, bias_init, gain=1): - weight_init(module.weight.data, gain=gain) - bias_init(module.bias.data) - return module - - -def cleanup_log_dir(log_dir): - - try: - os.makedirs(log_dir) - except: - pass - # files = glob.glob(os.path.join(log_dir, '*.monitor.csv')) - # for f in files: - # os.remove(f) diff --git a/examples/run_bo.py b/examples/run_bo.py index 030c464e..65be6b79 100644 --- a/examples/run_bo.py +++ b/examples/run_bo.py @@ -1,21 +1,26 @@ import random import numpy as np +import argparse from bo.run import run_bo +from ppo.args import add_ppo_args if __name__ == '__main__': seed = 0 random.seed(seed) np.random.seed(seed) + + parser = argparse.ArgumentParser(description='Arguments for ga script') + parser.add_argument('--exp-name', type=str, default='test_bo', help='Name of the experiment (default: test_bo)') + parser.add_argument('--env-name', type=str, default='Walker-v0', help='Name of the environment (default: Walker-v0)') + parser.add_argument('--pop-size', type=int, default=3, help='Population size (default: 3)') + parser.add_argument('--structure-shape', type=tuple, default=(5,5), help='Shape of the structure (default: (5,5))') + parser.add_argument('--max-evaluations', type=int, default=6, help='Maximum number of robots that will be evaluated (default: 6)') + parser.add_argument('--num-cores', type=int, default=3, help='Number of robots to evaluate simultaneously (default: 3)') + add_ppo_args(parser) + args = parser.parse_args() - best_robot, best_fitness = run_bo( - experiment_name='test_bo', - structure_shape=(5, 5), - pop_size=3, - max_evaluations=6, - train_iters=50, - num_cores=3, - ) + best_robot, best_fitness = run_bo(args) print('Best robot:') print(best_robot) diff --git a/examples/run_cppn_neat.py b/examples/run_cppn_neat.py index 18437c85..d917c789 100644 --- a/examples/run_cppn_neat.py +++ b/examples/run_cppn_neat.py @@ -1,21 +1,26 @@ import random import numpy as np +import argparse from cppn_neat.run import run_cppn_neat +from ppo.args import add_ppo_args if __name__ == '__main__': seed = 0 random.seed(seed) np.random.seed(seed) + + parser = argparse.ArgumentParser(description='Arguments for ga script') + parser.add_argument('--exp-name', type=str, default='test_cppn', help='Name of the experiment (default: test_cppn)') + parser.add_argument('--env-name', type=str, default='Walker-v0', help='Name of the environment (default: Walker-v0)') + parser.add_argument('--pop-size', type=int, default=3, help='Population size (default: 3)') + parser.add_argument('--structure-shape', type=tuple, default=(5,5), help='Shape of the structure (default: (5,5))') + parser.add_argument('--max-evaluations', type=int, default=6, help='Maximum number of robots that will be evaluated (default: 6)') + parser.add_argument('--num-cores', type=int, default=3, help='Number of robots to evaluate simultaneously (default: 3)') + add_ppo_args(parser) + args = parser.parse_args() - best_robot, best_fitness = run_cppn_neat( - experiment_name='test_cppn', - structure_shape=(5, 5), - pop_size=3, - max_evaluations=6, - train_iters=50, - num_cores=3, - ) + best_robot, best_fitness = run_cppn_neat(args) print('Best robot:') print(best_robot) diff --git a/examples/run_ga.py b/examples/run_ga.py index 10e36450..b7116457 100644 --- a/examples/run_ga.py +++ b/examples/run_ga.py @@ -1,18 +1,23 @@ import random import numpy as np +import argparse from ga.run import run_ga +from ppo.args import add_ppo_args if __name__ == "__main__": seed = 0 random.seed(seed) np.random.seed(seed) - run_ga( - pop_size = 3, - structure_shape = (5,5), - experiment_name = "test_ga", - max_evaluations = 6, - train_iters = 50, - num_cores = 3, - ) \ No newline at end of file + parser = argparse.ArgumentParser(description='Arguments for ga script') + parser.add_argument('--exp-name', type=str, default='test_ga', help='Name of the experiment (default: test_ga)') + parser.add_argument('--env-name', type=str, default='Walker-v0', help='Name of the environment (default: Walker-v0)') + parser.add_argument('--pop-size', type=int, default=3, help='Population size (default: 3)') + parser.add_argument('--structure_shape', type=tuple, default=(5,5), help='Shape of the structure (default: (5,5))') + parser.add_argument('--max-evaluations', type=int, default=6, help='Maximum number of robots that will be evaluated (default: 6)') + parser.add_argument('--num-cores', type=int, default=3, help='Number of robots to evaluate simultaneously (default: 3)') + add_ppo_args(parser) + args = parser.parse_args() + + run_ga(args) \ No newline at end of file diff --git a/examples/run_group_ppo.py b/examples/run_group_ppo.py index 87c66e98..d00f8348 100644 --- a/examples/run_group_ppo.py +++ b/examples/run_group_ppo.py @@ -9,13 +9,11 @@ name = 'big', robots = ['speed_bot', 'carry_bot'], envs = ['Walker-v0', 'Carrier-v0'], - train_iters = 50 ), SimJob( name = 'small', robots = ['speed_bot'], envs = ['Walker-v0'], - train_iters = 50 ) ] ) \ No newline at end of file diff --git a/examples/run_ppo.py b/examples/run_ppo.py new file mode 100644 index 00000000..1edb037e --- /dev/null +++ b/examples/run_ppo.py @@ -0,0 +1,79 @@ +import os +import shutil +import json +import argparse +import numpy as np +from stable_baselines3 import PPO +import evogym.envs +from evogym import WorldObject + +from ppo.args import add_ppo_args +from ppo.run import run_ppo +from ppo.eval import eval_policy + +if __name__ == "__main__": + + # Args + parser = argparse.ArgumentParser(description='Arguments for PPO script') + parser.add_argument( + "--env-name", default="Walker-v0", type=str, help="Environment name (default: Walker-v0)" + ) + parser.add_argument( + "--save-dir", default="saved_data", type=str, help="Parent directory in which to save data(default: saved_data)" + ) + parser.add_argument( + "--exp-name", default="test_ppo", type=str, help="Name of experiment. Data saved to (default: test_ppo)" + ) + parser.add_argument( + "--robot-path", default=os.path.join("world_data", "speed_bot.json"), type=str, help="Path to the robot json file (default: world_data/speed_bot.json)" + ) + add_ppo_args(parser) + args = parser.parse_args() + + # Manage dirs + exp_dir = os.path.join(args.save_dir, args.exp_name) + if os.path.exists(exp_dir): + print(f'THIS EXPERIMENT ({args.exp_name}) ALREADY EXISTS') + print("Delete and override? (y/n): ", end="") + ans = input() + if ans.lower() != "y": + exit() + shutil.rmtree(exp_dir) + model_save_dir = os.path.join(args.save_dir, args.exp_name, "controller") + structure_save_dir = os.path.join(args.save_dir, args.exp_name, "structure") + save_name = f"{args.env_name}" + + # Get Robot + robot = WorldObject.from_json(args.robot_path) + os.makedirs(structure_save_dir, exist_ok=True) + np.savez(os.path.join(structure_save_dir, save_name), robot.get_structure(), robot.get_connections()) + + # Train + best_reward = run_ppo( + args=args, + body=robot.get_structure(), + connections=robot.get_connections(), + env_name=args.env_name, + model_save_dir=model_save_dir, + model_save_name=save_name, + ) + + # Save result file + with open(os.path.join(args.save_dir, args.exp_name, "ppo_result.json"), "w") as f: + json.dump({ + "best_reward": best_reward, + "env_name": args.env_name, + }, f, indent=4) + + # Evaluate + model = PPO.load(os.path.join(model_save_dir, save_name)) + rewards = eval_policy( + model=model, + body=robot.get_structure(), + connections=robot.get_connections(), + env_name=args.env_name, + n_evals=1, + n_envs=1, + render_mode="human", + ) + print(f"Mean reward: {np.mean(rewards)}") \ No newline at end of file diff --git a/examples/visualize.py b/examples/visualize.py index 57bb51db..ef6b6c86 100644 --- a/examples/visualize.py +++ b/examples/visualize.py @@ -1,23 +1,43 @@ -import os, sys -root_dir = os.path.dirname(os.path.abspath(__file__)) -external_dir = os.path.join(root_dir, 'externals') -sys.path.insert(0, root_dir) -sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT')) -sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail')) +import os import json import argparse -import sys import numpy as np -import torch -import gym +from typing import Optional from utils.algo_utils import * -from ppo.envs import make_vec_envs -from ppo.utils import get_vec_normalize - +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env import evogym.envs +def rollout( + env_name: str, + n_iters: int, + model: PPO, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + seed: int = 42, +): + # Parallel environments + vec_env = make_vec_env(env_name, n_envs=1, seed=seed, env_kwargs={ + 'body': body, + 'connections': connections, + "render_mode": "human", + }) + + # Rollout + reward_sum = 0 + obs = vec_env.reset() + count = 0 + while count < n_iters: + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) + reward_sum += reward[0] + count += 1 + if done: + print(f'\nTotal reward: {reward_sum:.5f}\n') + vec_env.close() + def visualize_codesign(args, exp_name): global EXPERIMENT_PARENT_DIR gen_list = os.listdir(os.path.join(EXPERIMENT_PARENT_DIR, exp_name)) @@ -98,63 +118,10 @@ def visualize_codesign(args, exp_name): if num_iters == 0: continue - - env = make_vec_envs( - args.env_name, - structure, - 1000, - 1, - None, - None, - device='cpu', - allow_early_resets=False) - - # We need to use the same statistics for normalization as used in training - try: - save_path_controller = os.path.join(EXPERIMENT_PARENT_DIR, exp_name, "generation_" + str(gen_number), "controller", "robot_" + str(robot_index) + "_controller" + ".pt") - actor_critic, obs_rms = \ - torch.load(save_path_controller, - map_location='cpu') - except: - print(f'\nCould not load robot controller data at {save_path_controller}.\n') - continue - - vec_norm = get_vec_normalize(env) - if vec_norm is not None: - vec_norm.eval() - vec_norm.obs_rms = obs_rms - - recurrent_hidden_states = torch.zeros(1, - actor_critic.recurrent_hidden_state_size) - masks = torch.zeros(1, 1) - - obs = env.reset() - env.render('screen') - - total_steps = 0 - reward_sum = 0 - while total_steps < num_iters: - with torch.no_grad(): - value, action, _, recurrent_hidden_states = actor_critic.act( - obs, recurrent_hidden_states, masks, deterministic=args.det) - - - # Obser reward and next obs - obs, reward, done, _ = env.step(action) - masks.fill_(0.0 if (done) else 1.0) - reward_sum += reward - - if done == True: - env.reset() - reward_sum = float(reward_sum.numpy().flatten()[0]) - print(f'\ntotal reward: {round(reward_sum, 5)}\n') - reward_sum = 0 - - env.render('screen') - - total_steps += 1 - env.venv.close() + save_path_controller = os.path.join(EXPERIMENT_PARENT_DIR, exp_name, "generation_" + str(gen_number), "controller", f'{robot_index}.zip') + model = PPO.load(save_path_controller) + rollout(args.env_name, num_iters, model, structure[0], structure[1]) def visualize_group_ppo(args, exp_name): @@ -218,65 +185,45 @@ def visualize_group_ppo(args, exp_name): for key, value in structure_data.items(): structure.append(value) structure = tuple(structure) + + save_path_controller = os.path.join(exp_dir, job, "controller", f"{robot}_{env_name}.zip") + model = PPO.load(save_path_controller) + rollout(env_name, num_iters, model, structure[0], structure[1]) + +def visualize_ppo(args, exp_name): - env = make_vec_envs( - env_name, - structure, - 1000, - 1, - None, - None, - device='cpu', - allow_early_resets=False) - - # We need to use the same statistics for normalization as used in training - try: - save_path_controller = os.path.join(exp_dir, job, "controller", f"robot_{robot}_{env_name}_controller.pt") - actor_critic, obs_rms = \ - torch.load(save_path_controller, - map_location='cpu') - except: - print(f'\nCould not load robot controller data at {save_path_controller}.\n') - continue - - vec_norm = get_vec_normalize(env) - if vec_norm is not None: - vec_norm.eval() - vec_norm.obs_rms = obs_rms - - recurrent_hidden_states = torch.zeros(1, - actor_critic.recurrent_hidden_state_size) - masks = torch.zeros(1, 1) - - obs = env.reset() - env.render('screen') - - total_steps = 0 - reward_sum = 0 - while total_steps < num_iters: - with torch.no_grad(): - value, action, _, recurrent_hidden_states = actor_critic.act( - obs, recurrent_hidden_states, masks, deterministic=args.det) - - - # Obser reward and next obs - obs, reward, done, _ = env.step(action) - masks.fill_(0.0 if (done) else 1.0) - reward_sum += reward + exp_dir = os.path.join(EXPERIMENT_PARENT_DIR, exp_name) + out_file = os.path.join(exp_dir, 'ppo_result.json') + out = {} + with open(out_file, 'r') as f: + out = json.load(f) + + reward = out['best_reward'] + env_name = out['env_name'] + + print(f'\nEnvironment: {env_name}\nReward: {reward}') - if done == True: - env.reset() - reward_sum = float(reward_sum.numpy().flatten()[0]) - print(f'\ntotal reward: {round(reward_sum, 5)}\n') - reward_sum = 0 + while True: + print() + print("Enter num iters: ", end="") + num_iters = int(input()) + print() - env.render('screen') + if num_iters == 0: + continue - total_steps += 1 + save_path_structure = os.path.join(exp_dir, "structure", f"{env_name}.npz") + structure_data = np.load(save_path_structure) + structure = [] + for key, value in structure_data.items(): + structure.append(value) + structure = tuple(structure) - env.venv.close() + save_path_controller = os.path.join(exp_dir, "controller", f"{env_name}.zip") + model = PPO.load(save_path_controller) + rollout(env_name, num_iters, model, structure[0], structure[1]) -EXPERIMENT_PARENT_DIR = os.path.join(root_dir, 'saved_data') +EXPERIMENT_PARENT_DIR = os.path.join('saved_data') if __name__ == "__main__": parser = argparse.ArgumentParser(description='RL') @@ -301,9 +248,10 @@ def visualize_group_ppo(args, exp_name): exp_name = input() files_in_exp_dir = os.listdir(os.path.join(EXPERIMENT_PARENT_DIR, exp_name)) - # group ppo experiment - if 'output.json' in files_in_exp_dir: + + if 'output.json' in files_in_exp_dir: # group ppo experiment visualize_group_ppo(args, exp_name) - # codesign experiment - else: + elif 'ppo_result.json' in files_in_exp_dir: # ppo experiment + visualize_ppo(args, exp_name) + else: # codesign experiment visualize_codesign(args, exp_name) \ No newline at end of file diff --git a/images/design-tool.gif b/images/design-tool.gif new file mode 100644 index 00000000..e086cac0 Binary files /dev/null and b/images/design-tool.gif differ diff --git a/pyproject.toml b/pyproject.toml index d7036f80..541b310a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,54 @@ [build-system] -requires = ["setuptools", "wheel", "pybind11>=2.6.0"] -build-backend = "setuptools.build_meta" \ No newline at end of file +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" # Setuptools used because compiling C++ code requires CMake + +# Manually specify packages and their directories +[tool.setuptools.package-dir] +evogym = "evogym" +"evogym.envs" = "evogym/envs" +"evogym.simulator_cpp" = "evogym/simulator" # C++ simulator package renamed + +# Manually specify package data +[tool.setuptools.package-data] +"evogym.envs" = ["sim_files/*.json"] + +[tool.cibuildwheel] +# Skip 32-bit builds, and musllinux builds +skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] + +[tool.cibuildwheel.linux] + +manylinux-x86_64-image = "manylinux_2_28" +manylinux-pypy_x86_64-image = "manylinux_2_28" +before-all = "yum install -y libXrandr-devel libXinerama-devel libXcursor-devel libXi-devel mesa-libGLU-devel" +test-command = "echo 'installed'" + +[[tool.cibuildwheel.overrides]] +select = "*-musllinux*" +before-all = "apk add xorg-dev libglu1-mesa-dev libglew-dev xvfb" # these package names may be wrong, untested + +[project] +name = "evogym" +version = "2.0.0" +authors = [ + { name="Jagdeep Singh Bhatia", email="jagdeep@mit.edu" }, +] +description = "Evolution Gym: A benchmark for developing and evaluating soft robot co-design algorithms." +readme = "README.md" +license = {file = "LICENSE"} +keywords = ["evolution", "gym", "evolution gym", "soft robotics", "benchmark", "co-design"] +requires-python = ">=3.7, <3.11" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +dependencies = [ + "gymnasium", + "numpy<2.0.0", +] + +[project.urls] +Homepage = "https://github.com/EvolutionGym/evogym" +Issues = "https://github.com/EvolutionGym/evogym/issues" +Documentation = "https://evolutiongym.github.io/" \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..8883f14a --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest +pytest-xdist \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1d687450..0d11f04c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,5 @@ -glfw==2.5.0 -GPy==1.10.0 -GPyOpt @ git+https://github.com/yunshengtian/GPyOpt@5fc1188ffdefea9a3bc7964a9414d4922603e904 -gym==0.22.0 -h5py==3.6.0 -imageio==2.14.1 -matplotlib==3.5.1 neat-python @ git+https://github.com/yunshengtian/neat-python@2762ab630838520ca6c03a866e8a158f592b0370 -numpy==1.21.5 -opencv-python==4.5.5.62 -Pillow==9.0.0 -pybind11==2.9.0 -pygifsicle==1.0.5 -PyOpenGL==3.1.5 -PyOpenGL-accelerate==3.1.5 -torch==1.10.2 -ttkbootstrap==1.5.1 -typing==3.7.4.3 +GPyOpt @ git+https://github.com/yunshengtian/GPyOpt@5fc1188ffdefea9a3bc7964a9414d4922603e904 +stable-baselines3 +imageio +pygifsicle \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2bcfaeaa..00000000 --- a/setup.cfg +++ /dev/null @@ -1,19 +0,0 @@ -[metadata] -name = evogym -version = 1.0.0 -author = Jagdeep Bhatia -author_email = jagdeep@mit.edu -description = Evolution Gym: A benchmark for developing and comparing soft robot co-design algorithms. -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/EvolutionGym -project_urls = - Bug Tracker = https://github.com/EvolutionGym - Docs = http://evogym.csail.mit.edu/ -classifiers = - Programming Language :: Python :: 3 - License :: OSI Approved :: MIT License - Operating System :: OS Independent - -[options] -python_requires = >=3.6 \ No newline at end of file diff --git a/setup.py b/setup.py index 2e096fa5..4d19b549 100644 --- a/setup.py +++ b/setup.py @@ -57,16 +57,8 @@ def build_extension(self, ext): subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) - +# Install c++ package via cmake setup( - name="evogym", - packages=['evogym', 'evogym.envs'], - package_dir={ - 'evogym': 'evogym', - 'evogym.envs': 'evogym/envs'}, - package_data={ - "evogym.envs": [os.path.join('sim_files', '*.json')] #["*.json", "*.sob"], - }, ext_modules=[CMakeExtension('evogym.simulator_cpp', 'evogym/simulator')], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, diff --git a/tests/pytest.ini b/tests/pytest.ini new file mode 100644 index 00000000..5da2abb9 --- /dev/null +++ b/tests/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + lite: mark as part of the lite test suite \ No newline at end of file diff --git a/tests/requires_screen/test_screen_render_modes.py b/tests/requires_screen/test_screen_render_modes.py new file mode 100644 index 00000000..56583340 --- /dev/null +++ b/tests/requires_screen/test_screen_render_modes.py @@ -0,0 +1,78 @@ +import gymnasium as gym +import pytest +import warnings +import numpy as np +from itertools import product + +import evogym.envs +from evogym import sample_robot + +LITE_TEST_ENV_NAMES = [ + "Pusher-v0", + "Walker-v0", + "Traverser-v0", +] + +@pytest.mark.lite +@pytest.mark.parametrize( + "render_mode, add_options", + list(product( + ["human", "screen"], + [True, False], + )) +) +def test_render_modes(render_mode, add_options): + """ + - Env can render to screen + """ + + body, _ = sample_robot((5, 5)) + if add_options: + env = gym.make("Walker-v0", body=body, render_mode=render_mode, render_options={ + "verbose": False, + "hide_background": False, + "hide_grid": False, + "hide_edges": False, + "hide_voxels": False + }) + else: + env = gym.make("Walker-v0", body=body, render_mode=render_mode) + + # Reset + obs, info = env.reset(seed=None, options=None) + + for i in range(10): + + # Step -- we don't need to render explicitly + action = env.action_space.sample() - 1 + ob, reward, terminated, truncated, info = env.step(action) + + env.close() + +def get_all_env_render_params(): + return [ + env_name if env_name not in LITE_TEST_ENV_NAMES + else pytest.param(env_name, marks=pytest.mark.lite) + for env_name in evogym.BASELINE_ENV_NAMES + ] + +@pytest.mark.parametrize("env_name", get_all_env_render_params()) +def test_all_env_render(env_name): + """ + - Env can render to screen + """ + + body, _ = sample_robot((5, 5)) + env = gym.make(env_name, body=body, render_mode="human") + + # Reset + obs, info = env.reset(seed=None, options=None) + + for i in range(10): + + # Step -- we don't need to render explicitly + action = env.action_space.sample() - 1 + ob, reward, terminated, truncated, info = env.step(action) + + env.close() + \ No newline at end of file diff --git a/tests/screen_free/test_baseline_envs.py b/tests/screen_free/test_baseline_envs.py new file mode 100644 index 00000000..1550a19a --- /dev/null +++ b/tests/screen_free/test_baseline_envs.py @@ -0,0 +1,62 @@ +import gymnasium as gym +import pytest +import warnings + +import evogym.envs +from evogym import sample_robot + +LITE_TEST_ENV_NAMES = [ + "Pusher-v0", + "Walker-v0", + "Traverser-v0", +] + +def get_params(): + return [ + env_name if env_name not in LITE_TEST_ENV_NAMES + else pytest.param(env_name, marks=pytest.mark.lite) + for env_name in evogym.BASELINE_ENV_NAMES + ] + +@pytest.mark.parametrize("env_name", get_params()) +def test_env_creatable_and_has_correct_api(env_name): + """ + - Env is creatable + - Env steps for the correct number of steps + - Env follows the gym API + """ + + body, _ = sample_robot((5, 5)) + env = gym.make(env_name, body=body) + + target_steps = env.spec.max_episode_steps + assert isinstance(target_steps, int), f"Env {env_name} does not have a max_episode_steps attribute" + + # Reset + obs, info = env.reset(seed=None, options=None) + + # Rollout with random actions + n_steps = 0 + while True: + action = env.action_space.sample() - 1 + ob, reward, terminated, truncated, info = env.step(action) + + n_steps += 1 + + if terminated or truncated: + env.reset(seed=None, options=None) + break + + if n_steps > target_steps: + break + + # Make sure we can still step after resetting + env.step(env.action_space.sample() - 1) + + # Check that the env terminated after the correct number of steps + assert n_steps <= target_steps, f"Env {env_name} terminated after {n_steps} steps, expected at most {target_steps}" + if n_steps < target_steps: + warnings.warn(f"Env {env_name} terminated early after {n_steps} steps, expected {target_steps}") + + env.close() + \ No newline at end of file diff --git a/tests/screen_free/test_img_render_modes.py b/tests/screen_free/test_img_render_modes.py new file mode 100644 index 00000000..b42e1e4e --- /dev/null +++ b/tests/screen_free/test_img_render_modes.py @@ -0,0 +1,58 @@ +import gymnasium as gym +import pytest +import warnings +import numpy as np +from itertools import product + +import evogym.envs +from evogym import sample_robot + +LITE_TEST_ENV_NAMES = [ + "Pusher-v0", + "Walker-v0", + "Traverser-v0", +] + +def get_params(): + params = product( + evogym.BASELINE_ENV_NAMES, + [None, "img", "rgb_array"], + ) + return [ + param if param[0] not in LITE_TEST_ENV_NAMES + else pytest.param(*param, marks=pytest.mark.lite) + for param in params + ] + + +@pytest.mark.parametrize("env_name, render_mode", get_params()) +def test_render(env_name, render_mode): + """ + - Env can render to none and to image + """ + + body, _ = sample_robot((5, 5)) + env = gym.make(env_name, body=body, render_mode=render_mode) + + # Reset + obs, info = env.reset(seed=None, options=None) + + for i in range(10): + + # Render + result = env.render() + + if render_mode is None: + # Result should be None + assert result is None, f"Env returned {type(result)} instead of None" + else: + # Check img + assert isinstance(result, np.ndarray), f"Env returned {type(result)} instead of np.ndarray" + x, y, c = result.shape + assert c == 3, f"Env returned image with {c} channels, expected 3" + + # Step + action = env.action_space.sample() - 1 + ob, reward, terminated, truncated, info = env.step(action) + + env.close() \ No newline at end of file diff --git a/tests/screen_free/test_utils.py b/tests/screen_free/test_utils.py new file mode 100644 index 00000000..481d0963 --- /dev/null +++ b/tests/screen_free/test_utils.py @@ -0,0 +1,214 @@ +import numpy as np +import pytest +from pytest import raises +from typing import List, Tuple + +from evogym.utils import ( + VOXEL_TYPES, + get_uniform, draw, sample_robot, + is_connected, has_actuator, get_full_connectivity +) + +@pytest.mark.lite +def test_get_uniform(): + ones = get_uniform(1) + assert np.allclose(ones, np.ones(1)), ( + f"Expected {np.ones(1)}, got {ones}" + ) + + one_thirds = get_uniform(3) + assert np.allclose(one_thirds, np.ones(3) / 3), ( + f"Expected {np.ones(3) / 3}, got {one_thirds}" + ) + +@pytest.mark.lite +def test_draw(): + result = draw([0.2]) + assert result == 0, f"Expected 0, got {result}" + + result = draw([0.2, 0]) + assert result == 0, f"Expected 0, got {result}" + + result = draw([0, 15]) + assert result == 1, f"Expected 1, got {result}" + + pd = np.zeros(10) + pd[5] = 1 + result = draw(pd) + assert result == 5, f"Expected 5, got {result}" + + pd = np.ones(10) + for i in range(10): + result = draw(pd) + assert result in list(range(10)), f"Expected result to be between 0 and 9, got {result}" + +@pytest.mark.lite +def test_has_actuator(): + h_act, v_act = VOXEL_TYPES['H_ACT'], VOXEL_TYPES['V_ACT'] + others = [ + i for i in VOXEL_TYPES.values() if i not in [h_act, v_act] + ] + + robot = np.zeros((1, 1)) + robot[:, :] = others[0] + assert not has_actuator(robot), "Expected no actuator" + + robot[:, :] = h_act + assert has_actuator(robot), "Expected actuator" + + robot[:, :] = v_act + assert has_actuator(robot), "Expected actuator" + + robot = np.random.choice(others, (10, 10), replace=True) + assert not has_actuator(robot), "Expected no actuator" + + robot[5, 5] = h_act + assert has_actuator(robot), "Expected actuator" + + robot[5, 5] = v_act + assert has_actuator(robot), "Expected actuator" + + robot[1, 1] = h_act + assert has_actuator(robot), "Expected actuator" + + robot = np.random.choice([h_act, v_act], (10, 10), replace=True) + assert has_actuator(robot), "Expected actuator" + +def test_is_connected(): + empty = VOXEL_TYPES['EMPTY'] + others = [ + i for i in VOXEL_TYPES.values() if i != empty + ] + + robot = np.zeros((1, 1)) + robot[:, :] = empty + assert not is_connected(robot), "Expected not connected" + + for val in others: + robot[:, :] = val + assert is_connected(robot), "Expected connected" + + robot = np.array([[others[0]], [empty], [others[1]]]) + assert not is_connected(robot), "Expected not connected" + assert not is_connected(robot.T), "Expected not connected" + + robot = np.array([ + [others[0], empty, others[0]], + [others[1], empty, others[3]], + [others[2], others[1], others[0]] + ]) + assert is_connected(robot), "Expected connected" + assert is_connected(robot.T), "Expected connected" + + robot = np.array([ + [empty, empty, empty], + [empty, others[2], empty], + [empty, empty, empty] + ]) + assert is_connected(robot), "Expected connected" + + robot = np.array([ + [others[0], others[1], empty], + [others[1], empty, others[1]], + [empty, others[1], others[0]] + ]) + assert not is_connected(robot), "Expected not connected" + +@pytest.mark.lite +def test_get_full_connectivity(): + empty = VOXEL_TYPES['EMPTY'] + others = [ + i for i in VOXEL_TYPES.values() if i != empty + ] + + robot = np.zeros((1, 1)) + robot[:, :] = empty + assert get_full_connectivity(robot).shape[1] == 0, "Expected no connections" + assert get_full_connectivity(robot).shape[0] == 2, "Expected 2" + + robot[:, :] = others[0] + assert get_full_connectivity(robot).shape[1] == 0, "Expected no connections" + assert get_full_connectivity(robot).shape[0] == 2, "Expected 2" + + robot = np.array([[others[0], empty, others[0]]]) + connections = get_full_connectivity(robot) + assert connections.shape[1] == 0, "Expected no connections" + + def connections_contains_all(connections: np.ndarray, expected: List[Tuple[int, int]]): + connections_as_tuples = [ + (c[0], c[1]) for c in connections.T + ] + for i, j in expected: + if (i, j) not in connections_as_tuples or (j, i) not in connections_as_tuples: + return False + return True + + robot = np.array([ + [others[0], empty, others[0]], + [others[1], empty, others[1]], + ]) + connections = get_full_connectivity(robot) + assert connections.shape[1] == 2, "Expected 2 connections" + assert connections.shape[0] == 2, "Expected 2" + connections_contains_all(connections, [(0, 3), (2, 5)]) + + + robot = np.array([ + [others[0], others[2], empty], + [empty, others[3], others[1]], + ]) + connections = get_full_connectivity(robot) + assert connections.shape[1] == 3, "Expected 2 connections" + assert connections.shape[0] == 2, "Expected 2" + connections_contains_all(connections, [(0, 1), (1, 4), (4, 5)]) + +@pytest.mark.lite +def test_sample_robot(): + + h_act, v_act, empty = VOXEL_TYPES['H_ACT'], VOXEL_TYPES['V_ACT'], VOXEL_TYPES['EMPTY'] + + bad_pd = np.ones(5) + bad_pd[h_act] = 0 + bad_pd[v_act] = 0 + with raises(Exception): + sample_robot((5,5), bad_pd) + + bad_pd = np.zeros(5) + bad_pd[empty] = 1 + with raises(Exception): + sample_robot((5,5), bad_pd) + + def check_robot(robot: np.ndarray, connections: np.ndarray): + assert robot.shape == (5, 5), f"Expected shape (5, 5), got {robot.shape}" + assert is_connected(robot), "Expected robot to be connected" + assert has_actuator(robot), "Expected robot to have an actuator" + assert np.allclose(get_full_connectivity(robot), connections), "Expected connections to be the same" + + robot, connections = sample_robot((5, 5)) + check_robot(robot, connections) + + pd = np.ones(5) + pd[h_act] = 0 + robot, connections = sample_robot((5, 5), pd=pd) + check_robot(robot, connections) + + pd = np.ones(5) + pd[v_act] = 0 + robot, connections = sample_robot((5, 5), pd=pd) + check_robot(robot, connections) + + pd = np.ones(5) + pd[empty] = 0 + robot, connections = sample_robot((5, 5), pd=pd) + check_robot(robot, connections) + + pd = np.zeros(5) + pd[v_act] = 1 + robot, connections = sample_robot((5, 5), pd=pd) + check_robot(robot, connections) + + pd = np.zeros(5) + pd[h_act] = 1 + robot, connections = sample_robot((5, 5), pd=pd) + check_robot(robot, connections) + \ No newline at end of file diff --git a/tests/test_render.py b/tests/test_render.py deleted file mode 100644 index f0d26cab..00000000 --- a/tests/test_render.py +++ /dev/null @@ -1,22 +0,0 @@ -from unittest import TestCase - -import evogym.envs -import gym -from evogym import sample_robot - - -class RenderTest(TestCase): - def test_it(self): - body, connections = sample_robot((5, 5)) - env = gym.make("Walker-v0", body=body) - env.reset() - - for _ in range(100): - action = env.action_space.sample() - 1 - ob, reward, done, info = env.step(action) - env.render() - - if done: - env.reset() - - env.close() diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..53b1b913 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,5 @@ +LITE_TEST_ENV_NAMES = [ + "Pusher-v0", + "Walker-v0", + "Traverser-v0", +] \ No newline at end of file diff --git a/tutorials/README.md b/tutorials/README.md index 878b7cc0..41f03848 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -1,3 +1,25 @@ # Tutorials -This folder contains completed code for all tutorials on our [website](https://evolutiongym.github.io/tutorials). \ No newline at end of file +This folder contains completed code for all tutorials on our [website](https://evolutiongym.github.io/tutorials). + +## Custom Environment + +To see an example of custom environment creation, see `envs/simple_env.py`. This environment is registered in `envs/__init__.py`, and can be visualized by running `python .\visualize_simple_env.py` from this directory. + +## EvoGym Simulator API + +See `basic_api.py` for a simple example of how to create, step, and render an EvoGym simulator with objects of your choice. EvoGym can be used to simulate any number of objects and robots (although simulation speed may slow with many objects). + +To see understand the different rendering options available in EvoGym, see `rendering_options.py`. +You can run: + +```bash +python .\rendering_options.py --render-option to-debug-screen +``` + +| Option | Description | +|--------------------|------------------------------------------------------------------------| +| to-debug-screen | Render to EvoGym's default viewer | +| to-numpy-array | Render to a numpy array (visualized with open cv) | +| special-options | Render with special flags (for pretty visualization) | +| very-fast | Render without fps limit | diff --git a/tutorials/envs/__init__.py b/tutorials/envs/__init__.py index 56719962..dd84f967 100644 --- a/tutorials/envs/__init__.py +++ b/tutorials/envs/__init__.py @@ -1,7 +1,7 @@ # import envs and necessary gym packages from envs.simple_env import SimpleWalkerEnvClass -from gym.envs.registration import register +from gymnasium.envs.registration import register # register the env using gym's interface register( diff --git a/tutorials/envs/simple_env.py b/tutorials/envs/simple_env.py index 85891069..51cc8f1f 100644 --- a/tutorials/envs/simple_env.py +++ b/tutorials/envs/simple_env.py @@ -1,27 +1,34 @@ -from gym import spaces +from gymnasium import spaces from evogym import EvoWorld from evogym.envs import EvoGymBase +from typing import Optional, Dict, Any, Tuple import numpy as np import os class SimpleWalkerEnvClass(EvoGymBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join('world_data', 'simple_walker_env.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - EvoGymBase.__init__(self, self.world) + EvoGymBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size - obs_size = self.reset().size + obs_size = self.reset()[0].size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(obs_size,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(obs_size,), dtype=float) # set viewer to track objects self.default_viewer.track_objects('robot') @@ -58,12 +65,12 @@ def step(self, action): self.get_relative_pos_obs("robot"), )) - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -71,4 +78,4 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/tutorials/rendering_options.py b/tutorials/rendering_options.py index 682a92dd..34c533f2 100644 --- a/tutorials/rendering_options.py +++ b/tutorials/rendering_options.py @@ -2,6 +2,7 @@ import os import numpy as np import cv2 +import argparse ### CREATE A SIMPLE ENVIRONMENT ### @@ -27,13 +28,18 @@ ### SELECT A RENDERING OPTION ### -options = ['to-debug-screen', 'to-numpy-array', 'special-options', 'very-fast'] -option = options[0] - -print(f'\nUsing rendering option {option}...\n') +parser = argparse.ArgumentParser() +parser.add_argument( + '--render-option', + choices=['to-debug-screen', 'to-numpy-array', 'special-options', 'very-fast'], + help='Select a rendering option from: to-debug-screen, to-numpy-array, special-options, very-fast', + default='to-debug-screen', +) +args = parser.parse_args() +print(f'\nUsing rendering option {args.render_option}...\n') # if the 'very-fast' option is chosen, set the rendering speed to be unlimited -if option == 'very-fast': +if args.render_option == 'very-fast': viewer.set_target_rps(None) for i in range(1000): @@ -48,19 +54,19 @@ sim.step() # step and render to a debug screen - if option == 'to-debug-screen': + if args.render_option == 'to-debug-screen': viewer.render('screen') # step and render to a numpy array # use open cv to visualize output - if option == 'to-numpy-array': + if args.render_option == 'to-numpy-array': img = viewer.render('img') img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.waitKey(1) cv2.imshow("Open CV Window", img) # rendering with more options - if option == 'special-options': + if args.render_option == 'special-options': img = viewer.render( 'screen', verbose = True, @@ -70,7 +76,8 @@ hide_voxels = False) # rendering as fast as possible - if option == 'very-fast': + if args.render_option == 'very-fast': viewer.render('screen', verbose=True) cv2.destroyAllWindows() +viewer.close() diff --git a/tutorials/visualize_simple_env.py b/tutorials/visualize_simple_env.py index 5ecc77e7..d74cde3e 100644 --- a/tutorials/visualize_simple_env.py +++ b/tutorials/visualize_simple_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from evogym import sample_robot # import envs from the envs folder and register them @@ -10,17 +10,21 @@ body, connections = sample_robot((5,5)) # make the SimpleWalkingEnv using gym.make and with the robot information - env = gym.make('SimpleWalkingEnv-v0', body=body) + env = gym.make( + 'SimpleWalkingEnv-v0', + body=body, + render_mode='human', + render_options={'verbose': True} + ) env.reset() # step the environment for 500 iterations for i in range(500): action = env.action_space.sample() - ob, reward, done, info = env.step(action) - env.render(verbose=True) + ob, reward, terminated, truncated, info = env.step(action) - if done: + if terminated or truncated: env.reset() env.close()