Remove OpenAI Gym (gym) from dependencies and source code (#220)

* Remove gym from skrl.utils * Remove gym from skrl.resources * Remove gym from skrl.multi_agents * Remove gym from skrl.models * Remove gym from skrl.memories * Remove gym from skrl.envs * Remove gym from skrl.agents * Update dependencies * Remove gym from docs * Update CHANGELOG
Toni-SM · Nov 2, 2024 · 7dc161e · 7dc161e
1 parent 934fbaa
commit 7dc161e
Show file tree

Hide file tree

Showing 76 changed files with 339 additions and 397 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Use spaces utilities to process states, observations and actions for all the library components
 - Update model instantiators definitions to process supported fundamental and composite Gymnasium spaces
 - Make flattened tensor storage in memory the default option (revert changed introduced in version 1.3.0)
+- Drop support for PyTorch versions prior to 1.10 (the previous supported version was 1.9).
+
+### Removed
+- Remove OpenAI Gym (`gym`) from dependencies and source code. **skrl** continues to support gym environments,
+  it is just not installed as part of the library. If it is needed, it needs to be installed manually.
+  Any gym-based environment wrapper must use the `convert_gym_space` space utility to operate
 
 ## [1.3.0] - 2024-09-11
 ### Added
@@ -95,7 +101,7 @@ Summary of the most relevant features:
     - `from skrl.envs.loaders.jax import load_omniverse_isaacgym_env`
 
 ### Changed
-- Drop support for versions prior to PyTorch 1.9 (1.8.0 and 1.8.1)
+- Drop support for PyTorch versions prior to 1.9 (the previous supported version was 1.8)
 
 ## [1.0.0-rc.1] - 2023-07-25
 ### Added
@@ -188,7 +194,7 @@ to allow storing samples in memories during evaluation
 - Parameter `role` to model methods
 - Wrapper compatibility with the new OpenAI Gym environment API
 - Internal library colored logger
-- Migrate checkpoints/models from other RL libraries to skrl models/agents
+- Migrate checkpoints/models from other RL libraries to **skrl** models/agents
 - Configuration parameter `store_separately` to agent configuration dict
 - Save/load agent modules (models, optimizers, preprocessors)
 - Set random seed and configure deterministic behavior for reproducibility

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -54,7 +54,7 @@ Read the code a little bit and you will understand it at first glance... Also
   ```ini
   function annotation (e.g. typing)
   # insert an empty line
-  python libraries and other libraries (e.g. gym, numpy, time, etc.)
+  python libraries and other libraries (e.g. gymnasium, numpy, time, etc.)
   # insert an empty line
   machine learning framework modules (e.g. torch, torch.nn)
   # insert an empty line

diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst
@@ -12,10 +12,10 @@ In this section, you will find the steps to install the library, troubleshoot kn
 
 **skrl** requires Python 3.6 or higher and the following libraries (they will be installed automatically):
 
-    * `gym <https://www.gymlibrary.dev>`_ / `gymnasium <https://gymnasium.farama.org/>`_
-    * `tqdm <https://tqdm.github.io>`_
+    * `gymnasium <https://gymnasium.farama.org/>`_
     * `packaging <https://packaging.pypa.io>`_
     * `tensorboard <https://www.tensorflow.org/tensorboard>`_
+    * `tqdm <https://tqdm.github.io>`_
 
 Machine learning (ML) framework
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -25,7 +25,7 @@ According to the specific ML frameworks, the following libraries are required:
 PyTorch
 """""""
 
-    * `torch <https://pytorch.org>`_ 1.9.0 or higher
+    * `torch <https://pytorch.org>`_ 1.10.0 or higher
 
 JAX
 """

diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py
@@ -1,7 +1,7 @@
 # [start-agent-base-class-torch]
 from typing import Union, Tuple, Dict, Any, Optional
 
-import gym, gymnasium
+import gymnasium
 import copy
 
 import torch
@@ -33,8 +33,8 @@ class CUSTOM(Agent):
     def __init__(self,
                  models: Dict[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
-                 observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                 action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                 observation_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
+                 action_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
                  device: Optional[Union[str, torch.device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Custom agent
@@ -46,9 +46,9 @@ def __init__(self,
                        for the rest only the environment transitions will be added
         :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None
         :param observation_space: Observation/state space or shape (default: None)
-        :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+        :type observation_space: int, tuple or list of integers, gymnasium.Space or None, optional
         :param action_space: Action space or shape (default: None)
-        :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+        :type action_space: int, tuple or list of integers, gymnasium.Space or None, optional
         :param device: Device on which a torch tensor is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or torch.device, optional
@@ -179,7 +179,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
 # [start-agent-base-class-jax]
 from typing import Union, Tuple, Dict, Any, Optional
 
-import gym, gymnasium
+import gymnasium
 import copy
 
 import jaxlib
@@ -213,8 +213,8 @@ class CUSTOM(Agent):
     def __init__(self,
                  models: Dict[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
-                 observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                 action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                 observation_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
+                 action_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
                  device: Optional[Union[str, jaxlib.xla_extension.Device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Custom agent
@@ -226,9 +226,9 @@ def __init__(self,
                        for the rest only the environment transitions will be added
         :type memory: skrl.memory.jax.Memory, list of skrl.memory.jax.Memory or None
         :param observation_space: Observation/state space or shape (default: None)
-        :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+        :type observation_space: int, tuple or list of integers, gymnasium.Space or None, optional
         :param action_space: Action space or shape (default: None)
-        :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+        :type action_space: int, tuple or list of integers, gymnasium.Space or None, optional
         :param device: Device on which a jax array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or jaxlib.xla_extension.Device, optional

diff --git a/docs/source/snippets/model_mixin.py b/docs/source/snippets/model_mixin.py
@@ -1,7 +1,7 @@
 # [start-model-torch]
 from typing import Optional, Union, Mapping, Sequence, Tuple, Any
 
-import gym, gymnasium
+import gymnasium
 
 import torch
 
@@ -10,17 +10,17 @@
 
 class CustomModel(Model):
     def __init__(self,
-                 observation_space: Union[int, Sequence[int], gym.Space, gymnasium.Space],
-                 action_space: Union[int, Sequence[int], gym.Space, gymnasium.Space],
+                 observation_space: Union[int, Sequence[int], gymnasium.Space],
+                 action_space: Union[int, Sequence[int], gymnasium.Space],
                  device: Optional[Union[str, torch.device]] = None) -> None:
         """Custom model
 
         :param observation_space: Observation/state space or shape.
                                   The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space, gymnasium.Space
+        :type observation_space: int, sequence of int, gymnasium.Space
         :param action_space: Action space or shape.
                              The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space, gymnasium.Space
+        :type action_space: int, sequence of int, gymnasium.Space
         :param device: Device on which a torch tensor is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or torch.device, optional
@@ -58,7 +58,7 @@ def act(self,
 # [start-model-jax]
 from typing import Optional, Union, Mapping, Tuple, Any
 
-import gym, gymnasium
+import gymnasium
 
 import flax
 import jaxlib
@@ -69,19 +69,19 @@ def act(self,
 
 class CustomModel(Model):
     def __init__(self,
-                 observation_space: Union[int, Sequence[int], gym.Space, gymnasium.Space],
-                 action_space: Union[int, Sequence[int], gym.Space, gymnasium.Space],
+                 observation_space: Union[int, Sequence[int], gymnasium.Space],
+                 action_space: Union[int, Sequence[int], gymnasium.Space],
                  device: Optional[Union[str, jaxlib.xla_extension.Device]] = None,
                  parent: Optional[Any] = None,
                  name: Optional[str] = None) -> None:
         """Custom model
 
         :param observation_space: Observation/state space or shape.
                                   The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space, gymnasium.Space
+        :type observation_space: int, sequence of int, gymnasium.Space
         :param action_space: Action space or shape.
                              The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space, gymnasium.Space
+        :type action_space: int, sequence of int, gymnasium.Space
         :param device: Device on which a jax array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or jaxlib.xla_extension.Device, optional

diff --git a/docs/source/snippets/multi_agent.py b/docs/source/snippets/multi_agent.py
@@ -1,7 +1,7 @@
 # [start-multi-agent-base-class-torch]
 from typing import Union, Dict, Any, Optional, Sequence, Mapping
 
-import gym, gymnasium
+import gymnasium
 import copy
 
 import torch
@@ -34,8 +34,8 @@ def __init__(self,
                  possible_agents: Sequence[str],
                  models: Dict[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
-                 observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
-                 action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
+                 observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gymnasium.Space]]] = None,
+                 action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gymnasium.Space]]] = None,
                  device: Optional[Union[str, torch.device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Custom multi-agent
@@ -48,9 +48,9 @@ def __init__(self,
         :param memories: Memories to storage the transitions.
         :type memories: dictionary of skrl.memory.torch.Memory, optional
         :param observation_spaces: Observation/state spaces or shapes (default: ``None``)
-        :type observation_spaces: dictionary of int, sequence of int, gym.Space or gymnasium.Space, optional
+        :type observation_spaces: dictionary of int, sequence of int or gymnasium.Space, optional
         :param action_spaces: Action spaces or shapes (default: ``None``)
-        :type action_spaces: dictionary of int, sequence of int, gym.Space or gymnasium.Space, optional
+        :type action_spaces: dictionary of int, sequence of int or gymnasium.Space, optional
         :param device: Device on which a torch tensor is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or torch.device, optional
@@ -182,7 +182,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
 # [start-multi-agent-base-class-jax]
 from typing import Union, Dict, Any, Optional, Sequence, Mapping
 
-import gym, gymnasium
+import gymnasium
 import copy
 
 import jaxlib
@@ -217,8 +217,8 @@ def __init__(self,
                  possible_agents: Sequence[str],
                  models: Dict[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
-                 observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
-                 action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
+                 observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gymnasium.Space]]] = None,
+                 action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gymnasium.Space]]] = None,
                  device: Optional[Union[str, jaxlib.xla_extension.Device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Custom multi-agent
@@ -231,9 +231,9 @@ def __init__(self,
         :param memories: Memories to storage the transitions.
         :type memories: dictionary of skrl.memory.torch.Memory, optional
         :param observation_spaces: Observation/state spaces or shapes (default: ``None``)
-        :type observation_spaces: dictionary of int, sequence of int, gym.Space or gymnasium.Space, optional
+        :type observation_spaces: dictionary of int, sequence of int or gymnasium.Space, optional
         :param action_spaces: Action spaces or shapes (default: ``None``)
-        :type action_spaces: dictionary of int, sequence of int, gym.Space or gymnasium.Space, optional
+        :type action_spaces: dictionary of int, sequence of int or gymnasium.Space, optional
         :param device: Device on which a jax array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda:0"`` if available or ``"cpu"``
         :type device: str or jaxlib.xla_extension.Device, optional

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,15 +22,14 @@ classifiers = [
 ]
 # dependencies / optional-dependencies
 dependencies = [
-  "gym",
   "gymnasium",
-  "tqdm",
   "packaging",
   "tensorboard",
+  "tqdm",
 ]
 [project.optional-dependencies]
 torch = [
-  "torch>=1.9",
+  "torch>=1.10",
 ]
 jax = [
   "jax>=0.4.3",

diff --git a/skrl/agents/jax/a2c/a2c.py b/skrl/agents/jax/a2c/a2c.py
@@ -2,7 +2,6 @@
 
 import copy
 import functools
-import gym
 import gymnasium
 
 import jax
@@ -172,8 +171,8 @@ class A2C(Agent):
     def __init__(self,
                  models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
-                 observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                 action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                 observation_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
+                 action_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
                  device: Optional[Union[str, jax.Device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Advantage Actor Critic (A2C)
@@ -187,9 +186,9 @@ def __init__(self,
                        for the rest only the environment transitions will be added
         :type memory: skrl.memory.jax.Memory, list of skrl.memory.jax.Memory or None
         :param observation_space: Observation/state space or shape (default: ``None``)
-        :type observation_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type observation_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param action_space: Action space or shape (default: ``None``)
-        :type action_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type action_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param device: Device on which a tensor/array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda"`` if available or ``"cpu"``
         :type device: str or jax.Device, optional

diff --git a/skrl/agents/jax/base.py b/skrl/agents/jax/base.py
@@ -5,7 +5,6 @@
 import datetime
 import os
 import pickle
-import gym
 import gymnasium
 
 import flax
@@ -21,8 +20,8 @@ class Agent:
     def __init__(self,
                  models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
-                 observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                 action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                 observation_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
+                 action_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
                  device: Optional[Union[str, jax.Device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Base class that represent a RL agent
@@ -34,9 +33,9 @@ def __init__(self,
                        for the rest only the environment transitions will be added
         :type memory: skrl.memory.jax.Memory, list of skrl.memory.jax.Memory or None
         :param observation_space: Observation/state space or shape (default: ``None``)
-        :type observation_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type observation_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param action_space: Action space or shape (default: ``None``)
-        :type action_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type action_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param device: Device on which a tensor/array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda"`` if available or ``"cpu"``
         :type device: str or jax.Device, optional

diff --git a/skrl/agents/jax/cem/cem.py b/skrl/agents/jax/cem/cem.py
@@ -1,7 +1,6 @@
 from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
-import gym
 import gymnasium
 
 import jax
@@ -54,8 +53,8 @@ class CEM(Agent):
     def __init__(self,
                  models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
-                 observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                 action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                 observation_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
+                 action_space: Optional[Union[int, Tuple[int], gymnasium.Space]] = None,
                  device: Optional[Union[str, jax.Device]] = None,
                  cfg: Optional[dict] = None) -> None:
         """Cross-Entropy Method (CEM)
@@ -69,9 +68,9 @@ def __init__(self,
                        for the rest only the environment transitions will be added
         :type memory: skrl.memory.jax.Memory, list of skrl.memory.jax.Memory or None
         :param observation_space: Observation/state space or shape (default: ``None``)
-        :type observation_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type observation_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param action_space: Action space or shape (default: ``None``)
-        :type action_space: int, tuple or list of int, gym.Space, gymnasium.Space or None, optional
+        :type action_space: int, tuple or list of int, gymnasium.Space or None, optional
         :param device: Device on which a tensor/array is or will be allocated (default: ``None``).
                        If None, the device will be either ``"cuda"`` if available or ``"cpu"``
         :type device: str or jax.Device, optional