diff --git a/.gitignore b/.gitignore
index 6473ced97..a98cef38e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,4 @@ build/
 dist/
 .idea/
 results/
-examples/gym/results/
+examples/gymnasium/results/
diff --git a/.pfnci/run.sh b/.pfnci/run.sh
index dc575fec6..37480af36 100644
--- a/.pfnci/run.sh
+++ b/.pfnci/run.sh
@@ -75,7 +75,7 @@ main() {
   # pytest does not run with attrs==19.2.0 (https://github.com/pytest-dev/pytest/issues/3280)  # NOQA
   "${PYTHON}" -m pip install \
       'pytest==4.1.1' 'attrs==19.1.0' 'pytest-xdist==1.26.1' \
-      'gym[atari,classic_control]==0.19.0' 'optuna' 'zipp==1.0.0' 'pybullet==2.8.1' 'jupyterlab==2.1.5' 'traitlets==5.1.1' 'pyglet==1.5.27'
+      'gymnasium[atari,classic_control]==0.19.0' 'optuna' 'zipp==1.0.0' 'pybullet==2.8.1' 'jupyterlab==2.1.5' 'traitlets==5.1.1'
 
   git config --global user.email "you@example.com"
   git config --global user.name "Your Name"
diff --git a/README.md b/README.md
index 1a88055c3..d2840f7e6 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ Refer to [Installation](http://pfrl.readthedocs.io/en/latest/install.html) for m
 
 ## Getting started
 
-You can try [PFRL Quickstart Guide](examples/quickstart/quickstart.ipynb) first, or check the [examples](examples) ready for Atari 2600 and Open AI Gym.
+You can try [PFRL Quickstart Guide](examples/quickstart/quickstart.ipynb) first, or check the [examples](examples) ready for Atari 2600 and Farama Foundation's gymnasium.
 
 For more information, you can refer to [PFRL's documentation](http://pfrl.readthedocs.io/en/latest/index.html).
 
@@ -64,9 +64,9 @@ Following algorithms have been implemented in PFRL:
 - [ACER (Actor-Critic with Experience Replay)](https://arxiv.org/abs/1611.01224)
   - examples: [[atari]](examples/atari/train_acer_ale.py)
 - [Categorical DQN](https://arxiv.org/abs/1707.06887)
-  - examples: [[atari]](examples/atari/train_categorical_dqn_ale.py) [[general gym]](examples/gym/train_categorical_dqn_gym.py)
+  - examples: [[atari]](examples/atari/train_categorical_dqn_ale.py) [[general gymnasium]](examples/gymnasium/train_categorical_dqn_gymnasium.py)
 - [DQN (Deep Q-Network)](https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf) (including [Double DQN](https://arxiv.org/abs/1509.06461), [Persistent Advantage Learning (PAL)](https://arxiv.org/abs/1512.04860), Double PAL, [Dynamic Policy Programming (DPP)](http://www.jmlr.org/papers/volume13/azar12a/azar12a.pdf))
-  - examples: [[atari reproduction]](examples/atari/reproduction/dqn) [[atari]](examples/atari/train_dqn_ale.py) [[atari (batched)]](examples/atari/train_dqn_batch_ale.py) [[flickering atari]](examples/atari/train_drqn_ale.py) [[general gym]](examples/gym/train_dqn_gym.py)
+  - examples: [[atari reproduction]](examples/atari/reproduction/dqn) [[atari]](examples/atari/train_dqn_ale.py) [[atari (batched)]](examples/atari/train_dqn_batch_ale.py) [[flickering atari]](examples/atari/train_drqn_ale.py) [[general gymnasium]](examples/gymnasium/train_dqn_gymnasium.py)
 - [DDPG (Deep Deterministic Policy Gradients)](https://arxiv.org/abs/1509.02971) (including [SVG(0)](https://arxiv.org/abs/1510.09142))
   - examples: [[mujoco reproduction]](examples/mujoco/reproduction/ddpg)
 - [IQN (Implicit Quantile Networks)](https://arxiv.org/abs/1806.06923)
@@ -76,7 +76,7 @@ Following algorithms have been implemented in PFRL:
 - [Rainbow](https://arxiv.org/abs/1710.02298)
   - examples: [[atari reproduction]](examples/atari/reproduction/rainbow) [[Slime volleyball]](examples/slimevolley/)
 - [REINFORCE](http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf)
-  - examples: [[general gym]](examples/gym/train_reinforce_gym.py)
+  - examples: [[general gymnasium]](examples/gymnasium/train_reinforce_gymnasium.py)
 - [SAC (Soft Actor-Critic)](https://arxiv.org/abs/1812.05905)
   - examples: [[mujoco reproduction]](examples/mujoco/reproduction/soft_actor_critic) [[Atlas walk]](examples/atlas/)
 - [TRPO (Trust Region Policy Optimization)](https://arxiv.org/abs/1502.05477) with [GAE (Generalized Advantage Estimation)](https://arxiv.org/abs/1506.02438)
@@ -92,14 +92,14 @@ Following useful techniques have been also implemented in PFRL:
 - [Dueling Network](https://arxiv.org/abs/1511.06581)
   - examples: [[Rainbow]](examples/atari/reproduction/rainbow) [[DQN/DoubleDQN/PAL]](examples/atari/train_dqn_ale.py)
 - [Normalized Advantage Function](https://arxiv.org/abs/1603.00748)
-  - examples: [[DQN]](examples/gym/train_dqn_gym.py) (for continuous-action envs only)
+  - examples: [[DQN]](examples/gymnasium/train_dqn_gymnasium.py) (for continuous-action envs only)
 - [Deep Recurrent Q-Network](https://arxiv.org/abs/1507.06527)
   - examples: [[DQN]](examples/atari/train_drqn_ale.py)
 
 
 ## Environments
 
-Environments that support the subset of OpenAI Gym's interface (`reset` and `step` methods) can be used.
+Environments that support the subset of Farama Foundation's gymnasium's interface (`reset` and `step` methods) can be used.
 
 ## Contributing
 
diff --git a/examples/README.md b/examples/README.md
index f8fc3c4b6..4b97fc16c 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -3,7 +3,7 @@
 - `atari`: examples for general Atari games
 - `atari/reproduction`: examples with benchmark scores for reproducing published results on Atari
 - `atlas`: training an Atlas robot to walk
-- `gym`: examples for OpenAI Gym environments
+- `gymnasium`: examples for OpenAI gymnasium environments
 - `grasping`: examples for a Bullet-based robotic grasping environment
 - `mujoco/reproduction`: examples with benchmark scores for reproducing published results on MuJoCo tasks
 - `quickstart`: a quickstart guide of PFRL
diff --git a/examples/atari/train_acer_ale.py b/examples/atari/train_acer_ale.py
index 9cdfa5945..d95cb1cca 100644
--- a/examples/atari/train_acer_ale.py
+++ b/examples/atari/train_acer_ale.py
@@ -4,8 +4,8 @@
 # Prevent numpy from using multiple threads
 os.environ["OMP_NUM_THREADS"] = "1"
 
-import gym  # NOQA:E402
-import gym.wrappers  # NOQA:E402
+import gymnasium  # NOQA:E402
+import gymnasium.wrappers  # NOQA:E402
 import numpy as np  # NOQA:E402
 from torch import nn  # NOQA:E402
 
@@ -91,7 +91,7 @@ def main():
     args.outdir = experiments.prepare_output_dir(args, args.outdir)
     print("Output files are saved in {}".format(args.outdir))
 
-    n_actions = gym.make(args.env).action_space.n
+    n_actions = gymnasium.make(args.env).action_space.n
 
     input_to_hidden = nn.Sequential(
         nn.Conv2d(4, 16, 8, stride=4),
diff --git a/examples/atari/train_drqn_ale.py b/examples/atari/train_drqn_ale.py
index ccbefa699..a0425784d 100644
--- a/examples/atari/train_drqn_ale.py
+++ b/examples/atari/train_drqn_ale.py
@@ -11,8 +11,8 @@
 """
 import argparse
 
-import gym
-import gym.wrappers
+import gymnasium
+import gymnasium.wrappers
 import numpy as np
 import torch
 from torch import nn
@@ -193,7 +193,7 @@ def make_env(test):
             # Randomize actions like epsilon-greedy in evaluation as well
             env = pfrl.wrappers.RandomizeAction(env, args.eval_epsilon)
         if args.monitor:
-            env = gym.wrappers.Monitor(
+            env = gymnasium.wrappers.Monitor(
                 env, args.outdir, mode="evaluation" if test else "training"
             )
         if args.render:
diff --git a/examples/atari/train_ppo_ale.py b/examples/atari/train_ppo_ale.py
index 80bac591f..dd48244fd 100644
--- a/examples/atari/train_ppo_ale.py
+++ b/examples/atari/train_ppo_ale.py
@@ -1,4 +1,4 @@
-"""An example of training PPO against OpenAI Gym Atari Envs.
+"""An example of training PPO against OpenAI gymnasium Atari Envs.
 
 This script is an example of training a PPO agent on Atari envs.
 
@@ -25,7 +25,7 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--env", type=str, default="BreakoutNoFrameskip-v4", help="Gym Env ID."
+        "--env", type=str, default="BreakoutNoFrameskip-v4", help="gymnasium Env ID."
     )
     parser.add_argument(
         "--gpu", type=int, default=0, help="GPU device ID. Set to -1 to use CPUs only."
diff --git a/examples/atlas/train_soft_actor_critic_atlas.py b/examples/atlas/train_soft_actor_critic_atlas.py
index 1d35d6e82..76d147279 100644
--- a/examples/atlas/train_soft_actor_critic_atlas.py
+++ b/examples/atlas/train_soft_actor_critic_atlas.py
@@ -4,8 +4,8 @@
 import logging
 import sys
 
-import gym
-import gym.wrappers
+import gymnasium
+import gymnasium.wrappers
 import numpy as np
 import torch
 from torch import distributions, nn
@@ -17,16 +17,16 @@
 
 def make_env(args, seed, test):
     if args.env.startswith("Roboschool"):
-        # Check gym version because roboschool does not work with gym>=0.15.6
+        # Check gymnasium version because roboschool does not work with gymnasium>=0.15.6
         from distutils.version import StrictVersion
 
-        gym_version = StrictVersion(gym.__version__)
-        if gym_version >= StrictVersion("0.15.6"):
-            raise RuntimeError("roboschool does not work with gym>=0.15.6")
+        gymnasium_version = StrictVersion(gymnasium.__version__)
+        if gymnasium_version >= StrictVersion("0.15.6"):
+            raise RuntimeError("roboschool does not work with gymnasium>=0.15.6")
         import roboschool  # NOQA
-    env = gym.make(args.env)
+    env = gymnasium.make(args.env)
     # Unwrap TimiLimit wrapper
-    assert isinstance(env, gym.wrappers.TimeLimit)
+    assert isinstance(env, gymnasium.wrappers.TimeLimit)
     env = env.env
     # Use different random seeds for train and test envs
     env_seed = 2**32 - 1 - seed if test else seed
@@ -59,7 +59,7 @@ def main():
         "--env",
         type=str,
         default="RoboschoolAtlasForwardWalk-v1",
-        help="OpenAI Gym env to perform algorithm on.",
+        help="OpenAI gymnasium env to perform algorithm on.",
     )
     parser.add_argument(
         "--num-envs", type=int, default=4, help="Number of envs run in parallel."
diff --git a/examples/grasping/train_dqn_batch_grasping.py b/examples/grasping/train_dqn_batch_grasping.py
index 0274a0530..e4fa96024 100644
--- a/examples/grasping/train_dqn_batch_grasping.py
+++ b/examples/grasping/train_dqn_batch_grasping.py
@@ -2,8 +2,8 @@
 import functools
 import os
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import torch
 from torch import nn
@@ -13,7 +13,7 @@
 from pfrl.q_functions import DiscreteActionValueHead
 
 
-class CastAction(gym.ActionWrapper):
+class CastAction(gymnasium.ActionWrapper):
     """Cast actions to a given type."""
 
     def __init__(self, env, type_):
@@ -24,14 +24,14 @@ def action(self, action):
         return self.type_(action)
 
 
-class TransposeObservation(gym.ObservationWrapper):
+class TransposeObservation(gymnasium.ObservationWrapper):
     """Transpose observations."""
 
     def __init__(self, env, axes):
         super().__init__(env)
         self._axes = axes
-        assert isinstance(env.observation_space, gym.spaces.Box)
-        self.observation_space = gym.spaces.Box(
+        assert isinstance(env.observation_space, gymnasium.spaces.Box)
+        self.observation_space = gymnasium.spaces.Box(
             low=env.observation_space.low.transpose(*self._axes),
             high=env.observation_space.high.transpose(*self._axes),
             dtype=env.observation_space.dtype,
@@ -41,7 +41,7 @@ def observation(self, observation):
         return observation.transpose(*self._axes)
 
 
-class ObserveElapsedSteps(gym.Wrapper):
+class ObserveElapsedSteps(gymnasium.Wrapper):
     """Observe the number of elapsed steps in an episode.
 
     A new observation will be a tuple of an original observation and an integer
@@ -52,10 +52,10 @@ def __init__(self, env, max_steps):
         super().__init__(env)
         self._max_steps = max_steps
         self._elapsed_steps = 0
-        self.observation_space = gym.spaces.Tuple(
+        self.observation_space = gymnasium.spaces.Tuple(
             (
                 env.observation_space,
-                gym.spaces.Discrete(self._max_steps + 1),
+                gymnasium.spaces.Discrete(self._max_steps + 1),
             )
         )
 
@@ -64,13 +64,13 @@ def reset(self):
         return self.env.reset(), self._elapsed_steps
 
     def step(self, action):
-        observation, reward, done, info = self.env.step(action)
+        observation, reward, terminated, truncated, info = self.env.step(action)
         self._elapsed_steps += 1
         assert self._elapsed_steps <= self._max_steps
-        return (observation, self._elapsed_steps), reward, done, info
+        return (observation, self._elapsed_steps), reward, terminated, truncated, info
 
 
-class RecordMovie(gym.Wrapper):
+class RecordMovie(gymnasium.Wrapper):
     """Record MP4 videos using pybullet's logging API."""
 
     def __init__(self, env, dirname):
@@ -87,7 +87,7 @@ def reset(self):
             pybullet.STATE_LOGGING_VIDEO_MP4,
             os.path.join(self._dirname, "{}.mp4".format(self._episode_idx)),
         )
-        return obs
+        return obs, {}
 
 
 class GraspingQFunction(nn.Module):
@@ -243,7 +243,7 @@ def main():
     max_episode_steps = 8
 
     def make_env(idx, test):
-        from pybullet_envs.bullet.kuka_diverse_object_gym_env import (  # NOQA
+        from pybullet_envs.bullet.kuka_diverse_object_gymnasium_env import (  # NOQA
             KukaDiverseObjectEnv,
         )
 
@@ -263,7 +263,7 @@ def make_env(idx, test):
         # Disable file caching to keep memory usage small
         env._p.setPhysicsEngineParameter(enableFileCaching=False)
         assert env.observation_space is None
-        env.observation_space = gym.spaces.Box(
+        env.observation_space = gymnasium.spaces.Box(
             low=0, high=255, shape=(84, 84, 3), dtype=np.uint8
         )
         # (84, 84, 3) -> (3, 84, 84)
diff --git a/examples/gym/README.md b/examples/gym/README.md
deleted file mode 100644
index 0e46abf0d..000000000
--- a/examples/gym/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Examples for OpenAI Gym environments
-
-- `train_categorical_dqn_gym.py`: CategoricalDQN for discrete action action spaces
-- `train_dqn_gym.py`: DQN for both discrete action and continuous action spaces
-- `train_reinforce_gym.py`: REINFORCE for both discrete action and continuous action spaces (only for episodic envs)
-
-## How to run
-
-```
-python train_categorical_dqn_gym.py [options]
-python train_dqn_gym.py [options]
-python train_reinforce_gym.py [options]
-```
-
-Specify `--help` or read code for options.
diff --git a/examples/gymnasium/README.md b/examples/gymnasium/README.md
new file mode 100644
index 000000000..b17585519
--- /dev/null
+++ b/examples/gymnasium/README.md
@@ -0,0 +1,15 @@
+# Examples for OpenAI gymnasium environments
+
+- `train_categorical_dqn_gymnasium.py`: CategoricalDQN for discrete action action spaces
+- `train_dqn_gymnasium.py`: DQN for both discrete action and continuous action spaces
+- `train_reinforce_gymnasium.py`: REINFORCE for both discrete action and continuous action spaces (only for episodic envs)
+
+## How to run
+
+```
+python train_categorical_dqn_gymnasium.py [options]
+python train_dqn_gymnasium.py [options]
+python train_reinforce_gymnasium.py [options]
+```
+
+Specify `--help` or read code for options.
diff --git a/examples/gym/train_categorical_dqn_gym.py b/examples/gymnasium/train_categorical_dqn_gym.py
similarity index 95%
rename from examples/gym/train_categorical_dqn_gym.py
rename to examples/gymnasium/train_categorical_dqn_gym.py
index 7c7105189..ac07557c7 100644
--- a/examples/gym/train_categorical_dqn_gym.py
+++ b/examples/gymnasium/train_categorical_dqn_gym.py
@@ -1,16 +1,16 @@
-"""An example of training Categorical DQN against OpenAI Gym Envs.
+"""An example of training Categorical DQN against OpenAI gymnasium Envs.
 
 This script is an example of training a CategoricalDQN agent against OpenAI
-Gym envs. Only discrete spaces are supported.
+gymnasium envs. Only discrete spaces are supported.
 
 To solve CartPole-v0, run:
-    python train_categorical_dqn_gym.py --env CartPole-v0
+    python train_categorical_dqn_gymnasium.py --env CartPole-v0
 """
 
 import argparse
 import sys
 
-import gym
+import gymnasium
 import torch
 
 import pfrl
@@ -66,7 +66,7 @@ def main():
     print("Output files are saved in {}".format(args.outdir))
 
     def make_env(test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         env_seed = 2**32 - 1 - args.seed if test else args.seed
         env.seed(env_seed)
         # Cast observations to float32 because our model uses float32
diff --git a/examples/gym/train_dqn_gym.py b/examples/gymnasium/train_dqn_gym.py
similarity index 96%
rename from examples/gym/train_dqn_gym.py
rename to examples/gymnasium/train_dqn_gym.py
index 7a310965f..b4a5c22a2 100644
--- a/examples/gym/train_dqn_gym.py
+++ b/examples/gymnasium/train_dqn_gym.py
@@ -1,24 +1,24 @@
-"""An example of training DQN against OpenAI Gym Envs.
+"""An example of training DQN against OpenAI gymnasium Envs.
 
-This script is an example of training a DQN agent against OpenAI Gym envs.
+This script is an example of training a DQN agent against OpenAI gymnasium envs.
 Both discrete and continuous action spaces are supported. For continuous action
 spaces, A NAF (Normalized Advantage Function) is used to approximate Q-values.
 
-To solve CartPole-v0, run:
-    python train_dqn_gym.py --env CartPole-v0
+To solve CartPole-v1, run:
+    python train_dqn_gymnasium.py --env CartPole-v1
 
-To solve Pendulum-v0, run:
-    python train_dqn_gym.py --env Pendulum-v0
+To solve Pendulum-v1, run:
+    python train_dqn_gymnasium.py --env Pendulum-v1
 """
 
 import argparse
 import os
 import sys
 
-import gym
+import gymnasium
 import numpy as np
 import torch.optim as optim
-from gym import spaces
+from gymnasium import spaces
 
 import pfrl
 from pfrl import experiments, explorers
@@ -42,7 +42,7 @@ def main():
             " If it does not exist, it will be created."
         ),
     )
-    parser.add_argument("--env", type=str, default="Pendulum-v0")
+    parser.add_argument("--env", type=str, default="Pendulum-v1")
     parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)")
     parser.add_argument("--gpu", type=int, default=0)
     parser.add_argument("--final-exploration-steps", type=int, default=10**4)
@@ -100,7 +100,7 @@ def clip_action_filter(a):
         return np.clip(a, action_space.low, action_space.high)
 
     def make_env(idx=0, test=False):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Use different random seeds for train and test envs
         process_seed = int(process_seeds[idx])
         env_seed = 2**32 - 1 - process_seed if test else process_seed
diff --git a/examples/gym/train_reinforce_gym.py b/examples/gymnasium/train_reinforce_gym.py
similarity index 93%
rename from examples/gym/train_reinforce_gym.py
rename to examples/gymnasium/train_reinforce_gym.py
index f2c9eaa61..c82ed51e0 100644
--- a/examples/gym/train_reinforce_gym.py
+++ b/examples/gymnasium/train_reinforce_gym.py
@@ -1,18 +1,18 @@
-"""An example of training a REINFORCE agent against OpenAI Gym envs.
+"""An example of training a REINFORCE agent against OpenAI gymnasium envs.
 
-This script is an example of training a REINFORCE agent against OpenAI Gym
+This script is an example of training a REINFORCE agent against OpenAI gymnasium
 envs. Both discrete and continuous action spaces are supported.
 
 To solve CartPole-v0, run:
-    python train_reinforce_gym.py
+    python train_reinforce_gymnasium.py
 
 To solve InvertedPendulum-v1, run:
-    python train_reinforce_gym.py --env InvertedPendulum-v1
+    python train_reinforce_gymnasium.py --env InvertedPendulum-v1
 """
 import argparse
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import torch
 from torch import nn
 
@@ -59,7 +59,7 @@ def main():
     args.outdir = experiments.prepare_output_dir(args, args.outdir)
 
     def make_env(test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Use different random seeds for train and test envs
         env_seed = 2**32 - 1 - args.seed if test else args.seed
         env.seed(env_seed)
@@ -83,7 +83,7 @@ def make_env(test):
     obs_size = obs_space.low.size
     hidden_size = 200
     # Switch policy types accordingly to action space types
-    if isinstance(action_space, gym.spaces.Box):
+    if isinstance(action_space, gymnasium.spaces.Box):
         model = nn.Sequential(
             nn.Linear(obs_size, hidden_size),
             nn.LeakyReLU(0.2),
diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md
index bdc824806..4821f7abc 100644
--- a/examples/mujoco/reproduction/ddpg/README.md
+++ b/examples/mujoco/reproduction/ddpg/README.md
@@ -1,6 +1,6 @@
 # DDPG on MuJoCo benchmarks
 
-This example trains a DDPG agent ([Continuous Control with Deep Reinforcement Learning](https://arxiv.org/abs/1509.02971)) on MuJoCo benchmarks from OpenAI Gym.
+This example trains a DDPG agent ([Continuous Control with Deep Reinforcement Learning](https://arxiv.org/abs/1509.02971)) on MuJoCo benchmarks from OpenAI gymnasium.
 
 We follow the training and evaluation settings of [Addressing Function Approximation Error in Actor-Critic Methods](http://arxiv.org/abs/1802.09477), which provides thorough, highly tuned benchmark results.
 
diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py
index 397d231a6..41932d354 100644
--- a/examples/mujoco/reproduction/ddpg/train_ddpg.py
+++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py
@@ -1,4 +1,4 @@
-"""A training script of DDPG on OpenAI Gym Mujoco environments.
+"""A training script of DDPG on OpenAI gymnasium Mujoco environments.
 
 This script follows the settings of http://arxiv.org/abs/1802.09477 as much
 as possible.
@@ -8,8 +8,8 @@
 import logging
 import sys
 
-import gym
-import gym.wrappers
+import gymnasium
+import gymnasium.wrappers
 import numpy as np
 import torch
 from torch import nn
@@ -36,7 +36,7 @@ def main():
         "--env",
         type=str,
         default="Hopper-v2",
-        help="OpenAI Gym MuJoCo env to perform algorithm on.",
+        help="OpenAI gymnasium MuJoCo env to perform algorithm on.",
     )
     parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)")
     parser.add_argument(
@@ -81,7 +81,7 @@ def main():
         "--pretrained-type", type=str, default="best", choices=["best", "final"]
     )
     parser.add_argument(
-        "--monitor", action="store_true", help="Wrap env with gym.wrappers.Monitor."
+        "--monitor", action="store_true", help="Wrap env with gymnasium.wrappers.Monitor."
     )
     parser.add_argument(
         "--log-level", type=int, default=logging.INFO, help="Level of the root logger."
@@ -97,9 +97,9 @@ def main():
     utils.set_random_seed(args.seed)
 
     def make_env(test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Unwrap TimeLimit wrapper
-        assert isinstance(env, gym.wrappers.TimeLimit)
+        assert isinstance(env, gymnasium.wrappers.TimeLimit)
         env = env.env
         # Use different random seeds for train and test envs
         env_seed = 2**32 - 1 - args.seed if test else args.seed
diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md
index 7170455c4..ad1129aaf 100644
--- a/examples/mujoco/reproduction/ppo/README.md
+++ b/examples/mujoco/reproduction/ppo/README.md
@@ -1,6 +1,6 @@
 # PPO on MuJoCo benchmarks
 
-This example trains a PPO agent ([Proximal Policy Optimization Algorithms](http://arxiv.org/abs/1707.06347)) on MuJoCo benchmarks from OpenAI Gym.
+This example trains a PPO agent ([Proximal Policy Optimization Algorithms](http://arxiv.org/abs/1707.06347)) on MuJoCo benchmarks from OpenAI gymnasium.
 
 We follow the training and evaluation settings of [Deep Reinforcement Learning that Matters](https://arxiv.org/abs/1709.06560), which provides thorough, highly tuned benchmark results.
 
@@ -37,7 +37,7 @@ To view the full list of options, either view the code or run the example with t
 ## Known differences
 
 - While the original paper initialized weights by normal distribution (https://github.com/Breakend/baselines/blob/50ffe01d254221db75cdb5c2ba0ab51a6da06b0a/baselines/ppo1/mlp_policy.py#L28), we use orthogonal initialization as the latest openai/baselines does (https://github.com/openai/baselines/blob/9b68103b737ac46bc201dfb3121cfa5df2127e53/baselines/a2c/utils.py#L61).
-- We used version v2 of the environments whereas the original results were reported for version v1, however this doesn't seem to introduce significant differences: https://github.com/openai/gym/pull/834
+- We used version v2 of the environments whereas the original results were reported for version v1, however this doesn't seem to introduce significant differences: https://github.com/openai/gymnasium/pull/834
 
 ## Results
 
diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py
index a42d8f0af..991de8aec 100644
--- a/examples/mujoco/reproduction/ppo/train_ppo.py
+++ b/examples/mujoco/reproduction/ppo/train_ppo.py
@@ -1,4 +1,4 @@
-"""A training script of PPO on OpenAI Gym Mujoco environments.
+"""A training script of PPO on OpenAI gymnasium Mujoco environments.
 
 This script follows the settings of https://arxiv.org/abs/1709.06560 as much
 as possible.
@@ -6,8 +6,8 @@
 import argparse
 import functools
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import torch
 from torch import nn
@@ -28,7 +28,7 @@ def main():
         "--env",
         type=str,
         default="Hopper-v2",
-        help="OpenAI Gym MuJoCo env to perform algorithm on.",
+        help="OpenAI gymnasium MuJoCo env to perform algorithm on.",
     )
     parser.add_argument(
         "--num-envs", type=int, default=1, help="Number of envs run in parallel."
@@ -75,7 +75,7 @@ def main():
         "--log-level", type=int, default=logging.INFO, help="Level of the root logger."
     )
     parser.add_argument(
-        "--monitor", action="store_true", help="Wrap env with gym.wrappers.Monitor."
+        "--monitor", action="store_true", help="Wrap env with gymnasium.wrappers.Monitor."
     )
     parser.add_argument(
         "--log-interval",
@@ -112,7 +112,7 @@ def main():
     args.outdir = experiments.prepare_output_dir(args, args.outdir)
 
     def make_env(process_idx, test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Use different random seeds for train and test envs
         process_seed = int(process_seeds[process_idx])
         env_seed = 2**32 - 1 - process_seed if test else process_seed
@@ -134,14 +134,14 @@ def make_batch_env(test):
         )
 
     # Only for getting timesteps, and obs-action spaces
-    sample_env = gym.make(args.env)
+    sample_env = gymnasium.make(args.env)
     timestep_limit = sample_env.spec.max_episode_steps
     obs_space = sample_env.observation_space
     action_space = sample_env.action_space
     print("Observation space:", obs_space)
     print("Action space:", action_space)
 
-    assert isinstance(action_space, gym.spaces.Box)
+    assert isinstance(action_space, gymnasium.spaces.Box)
 
     # Normalize observations based on their empirical mean and variance
     obs_normalizer = pfrl.nn.EmpiricalNormalization(
diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md
index 319fdd0c0..da7dd4fde 100644
--- a/examples/mujoco/reproduction/soft_actor_critic/README.md
+++ b/examples/mujoco/reproduction/soft_actor_critic/README.md
@@ -1,6 +1,6 @@
 # Soft Actor-Critic (SAC) on MuJoCo benchmarks
 
-This example trains a SAC agent ([Soft Actor-Critic Algorithms and Applications](https://arxiv.org/abs/1812.05905)) on MuJoCo benchmarks from OpenAI Gym.
+This example trains a SAC agent ([Soft Actor-Critic Algorithms and Applications](https://arxiv.org/abs/1812.05905)) on MuJoCo benchmarks from OpenAI gymnasium.
 
 ## Requirements
 
diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
index 851785682..577ca881a 100644
--- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
+++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
@@ -1,4 +1,4 @@
-"""A training script of Soft Actor-Critic on OpenAI Gym Mujoco environments.
+"""A training script of Soft Actor-Critic on OpenAI gymnasium Mujoco environments.
 
 This script follows the settings of https://arxiv.org/abs/1812.05905 as much
 as possible.
@@ -9,8 +9,8 @@
 import sys
 from distutils.version import LooseVersion
 
-import gym
-import gym.wrappers
+import gymnasium
+import gymnasium.wrappers
 import numpy as np
 import torch
 from torch import distributions, nn
@@ -35,7 +35,7 @@ def main():
         "--env",
         type=str,
         default="Hopper-v2",
-        help="OpenAI Gym MuJoCo env to perform algorithm on.",
+        help="OpenAI gymnasium MuJoCo env to perform algorithm on.",
     )
     parser.add_argument(
         "--num-envs", type=int, default=1, help="Number of envs run in parallel."
@@ -83,7 +83,7 @@ def main():
         "--pretrained-type", type=str, default="best", choices=["best", "final"]
     )
     parser.add_argument(
-        "--monitor", action="store_true", help="Wrap env with gym.wrappers.Monitor."
+        "--monitor", action="store_true", help="Wrap env with gymnasium.wrappers.Monitor."
     )
     parser.add_argument(
         "--log-interval",
@@ -117,9 +117,9 @@ def main():
     assert process_seeds.max() < 2**32
 
     def make_env(process_idx, test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Unwrap TimiLimit wrapper
-        assert isinstance(env, gym.wrappers.TimeLimit)
+        assert isinstance(env, gymnasium.wrappers.TimeLimit)
         env = env.env
         # Use different random seeds for train and test envs
         process_seed = int(process_seeds[process_idx])
@@ -130,7 +130,7 @@ def make_env(process_idx, test):
         # Normalize action space to [-1, 1]^n
         env = pfrl.wrappers.NormalizeActionSpace(env)
         if args.monitor:
-            env = gym.wrappers.Monitor(env, args.outdir)
+            env = gymnasium.wrappers.Monitor(env, args.outdir)
         if args.render:
             env = pfrl.wrappers.Render(env)
         return env
diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md
index a9503b03c..81c2748d9 100644
--- a/examples/mujoco/reproduction/td3/README.md
+++ b/examples/mujoco/reproduction/td3/README.md
@@ -1,6 +1,6 @@
 # TD3 on MuJoCo benchmarks
 
-This example trains a TD3 agent ([Addressing Function Approximation Error in Actor-Critic Methods](http://arxiv.org/abs/1802.09477)) on MuJoCo benchmarks from OpenAI Gym.
+This example trains a TD3 agent ([Addressing Function Approximation Error in Actor-Critic Methods](http://arxiv.org/abs/1802.09477)) on MuJoCo benchmarks from OpenAI gymnasium.
 
 ## Requirements
 
@@ -55,7 +55,7 @@ Each evaluation reports average return over 10 episodes without exploration nois
 Maximum evaluation scores, averaged over 10 trials (+/- standard deviation), are reported for each environment.
 
 Reported scores are taken from the "TD3" column of Table 1 of [Addressing Function Approximation Error in Actor-Critic Methods](http://arxiv.org/abs/1802.09477).
-Although the original paper used v1 versions of MuJoCo envs, we used v2 as v1 are not supported by recent versions of OpenAI Gym.
+Although the original paper used v1 versions of MuJoCo envs, we used v2 as v1 are not supported by recent versions of OpenAI gymnasium.
 
 | Environment               | PFRL Score            | Reported Score        |
 | ------------------------- |:---------------------:|:---------------------:|
@@ -73,7 +73,7 @@ Although the original paper used v1 versions of MuJoCo envs, we used v2 as v1 ar
 Average return of last 10 evaluation scores, averaged over 10 trials, are reported for each environment.
 
 Reported scores are taken from the "TD3" row of Table 2 of [Addressing Function Approximation Error in Actor-Critic Methods](http://arxiv.org/abs/1802.09477).
-Although the original paper used v1 versions of MuJoCo envs, we used v2 as v1 are not supported by recent versions of OpenAI Gym.
+Although the original paper used v1 versions of MuJoCo envs, we used v2 as v1 are not supported by recent versions of OpenAI gymnasium.
 
 | Environment               | PFRL Score   | Reported Score |
 | ------------------------- |:------------:|:--------------:|
diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py
index e9ad62259..021388051 100644
--- a/examples/mujoco/reproduction/td3/train_td3.py
+++ b/examples/mujoco/reproduction/td3/train_td3.py
@@ -1,4 +1,4 @@
-"""A training script of TD3 on OpenAI Gym Mujoco environments.
+"""A training script of TD3 on OpenAI gymnasium Mujoco environments.
 
 This script follows the settings of http://arxiv.org/abs/1802.09477 as much
 as possible.
@@ -8,8 +8,8 @@
 import logging
 import sys
 
-import gym
-import gym.wrappers
+import gymnasium
+import gymnasium.wrappers
 import numpy as np
 import torch
 from torch import nn
@@ -33,7 +33,7 @@ def main():
         "--env",
         type=str,
         default="Hopper-v2",
-        help="OpenAI Gym MuJoCo env to perform algorithm on.",
+        help="OpenAI gymnasium MuJoCo env to perform algorithm on.",
     )
     parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)")
     parser.add_argument(
@@ -78,7 +78,7 @@ def main():
         "--pretrained-type", type=str, default="best", choices=["best", "final"]
     )
     parser.add_argument(
-        "--monitor", action="store_true", help="Wrap env with gym.wrappers.Monitor."
+        "--monitor", action="store_true", help="Wrap env with gymnasium.wrappers.Monitor."
     )
     parser.add_argument(
         "--log-level", type=int, default=logging.INFO, help="Level of the root logger."
@@ -94,9 +94,9 @@ def main():
     utils.set_random_seed(args.seed)
 
     def make_env(test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Unwrap TimeLimit wrapper
-        assert isinstance(env, gym.wrappers.TimeLimit)
+        assert isinstance(env, gymnasium.wrappers.TimeLimit)
         env = env.env
         # Use different random seeds for train and test envs
         env_seed = 2**32 - 1 - args.seed if test else args.seed
diff --git a/examples/mujoco/reproduction/trpo/README.md b/examples/mujoco/reproduction/trpo/README.md
index 1841ee7e4..b2b176ece 100644
--- a/examples/mujoco/reproduction/trpo/README.md
+++ b/examples/mujoco/reproduction/trpo/README.md
@@ -1,6 +1,6 @@
 # TRPO on MuJoCo benchmarks
 
-This example trains a TRPO agent ([Trust Region Policy Optimization](https://arxiv.org/abs/1502.05477)) on MuJoCo benchmarks from OpenAI Gym.
+This example trains a TRPO agent ([Trust Region Policy Optimization](https://arxiv.org/abs/1502.05477)) on MuJoCo benchmarks from OpenAI gymnasium.
 
 We follow the training and evaluation settings of [Deep Reinforcement Learning that Matters](https://arxiv.org/abs/1709.06560), which provides thorough, highly tuned benchmark results.
 
@@ -37,7 +37,7 @@ To view the full list of options, either view the code or run the example with t
 
 ## Known differences
 
-- We used version v2 of the environments whereas the original results were reported for version v1, however this doesn't seem to introduce significant differences: https://github.com/openai/gym/pull/834
+- We used version v2 of the environments whereas the original results were reported for version v1, however this doesn't seem to introduce significant differences: https://github.com/openai/gymnasium/pull/834
 
 ## Results
 
diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py
index 339a4955d..f11a0a331 100644
--- a/examples/mujoco/reproduction/trpo/train_trpo.py
+++ b/examples/mujoco/reproduction/trpo/train_trpo.py
@@ -1,4 +1,4 @@
-"""A training script of TRPO on OpenAI Gym Mujoco environments.
+"""A training script of TRPO on OpenAI gymnasium Mujoco environments.
 
 This script follows the settings of https://arxiv.org/abs/1709.06560 as much
 as possible.
@@ -6,9 +6,9 @@
 import argparse
 import logging
 
-import gym
-import gym.spaces
-import gym.wrappers
+import gymnasium
+import gymnasium.spaces
+import gymnasium.wrappers
 import torch
 from torch import nn
 
@@ -20,7 +20,7 @@ def main():
     parser.add_argument(
         "--gpu", type=int, default=0, help="GPU device ID. Set to -1 to use CPUs only."
     )
-    parser.add_argument("--env", type=str, default="Hopper-v2", help="Gym Env ID")
+    parser.add_argument("--env", type=str, default="Hopper-v2", help="gymnasium Env ID")
     parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)")
     parser.add_argument(
         "--outdir",
@@ -81,7 +81,7 @@ def main():
         "--monitor",
         action="store_true",
         help=(
-            "Monitor the env by gym.wrappers.Monitor."
+            "Monitor the env by gymnasium.wrappers.Monitor."
             " Videos and additional log will be saved."
         ),
     )
@@ -95,14 +95,14 @@ def main():
     args.outdir = pfrl.experiments.prepare_output_dir(args, args.outdir)
 
     def make_env(test):
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Use different random seeds for train and test envs
         env_seed = 2**32 - 1 - args.seed if test else args.seed
         env.seed(env_seed)
         # Cast observations to float32 because our model uses float32
         env = pfrl.wrappers.CastObservationToFloat32(env)
         if args.monitor:
-            env = gym.wrappers.Monitor(env, args.outdir)
+            env = gymnasium.wrappers.Monitor(env, args.outdir)
         if args.render:
             env = pfrl.wrappers.Render(env)
         return env
@@ -114,7 +114,7 @@ def make_env(test):
     print("Observation space:", obs_space)
     print("Action space:", action_space)
 
-    assert isinstance(obs_space, gym.spaces.Box)
+    assert isinstance(obs_space, gymnasium.spaces.Box)
 
     # Normalize observations based on their empirical mean and variance
     obs_normalizer = pfrl.nn.EmpiricalNormalization(
diff --git a/examples/optuna/optuna_dqn_obs1d.py b/examples/optuna/optuna_dqn_obs1d.py
index c21e70e8d..c1cd44011 100644
--- a/examples/optuna/optuna_dqn_obs1d.py
+++ b/examples/optuna/optuna_dqn_obs1d.py
@@ -14,7 +14,7 @@
 import os
 import random
 
-import gym
+import gymnasium as gym
 import torch.optim as optim
 
 try:
@@ -244,7 +244,7 @@ def main():
         "--env",
         type=str,
         default="LunarLander-v2",
-        help="OpenAI Gym Environment ID.",
+        help="OpenAI gym Environment ID.",
     )
     parser.add_argument(
         "--outdir",
diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb
index b31d0fe2e..d139c0ef7 100644
--- a/examples/quickstart/quickstart.ipynb
+++ b/examples/quickstart/quickstart.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "If you have already installed PFRL, let's begin!\n",
     "\n",
-    "First, you need to import necessary modules. The module name of PFRL is `pfrl`. Let's import `torch`, `gym`, and `numpy` as well since they are used later."
+    "First, you need to import necessary modules. The module name of PFRL is `pfrl`. Let's import `torch`, `gymnasium`, and `numpy` as well since they are used later."
    ]
   },
   {
@@ -27,7 +27,7 @@
     "import pfrl\n",
     "import torch\n",
     "import torch.nn\n",
-    "import gym\n",
+    "import gymnasium\n",
     "import numpy"
    ]
   },
@@ -35,13 +35,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "PFRL can be used for any problems if they are modeled as \"environments\". [OpenAI Gym](https://github.com/openai/gym) provides various kinds of benchmark environments and defines the common interface among them. PFRL uses a subset of the interface. Specifically, an environment must define its observation space and action space and have at least two methods: `reset` and `step`.\n",
+    "PFRL can be used for any problems if they are modeled as \"environments\". [OpenAI gymnasium](https://github.com/openai/gymnasium) provides various kinds of benchmark environments and defines the common interface among them. PFRL uses a subset of the interface. Specifically, an environment must define its observation space and action space and have at least two methods: `reset` and `step`.\n",
     "\n",
     "- `env.reset` will reset the environment to the initial state and return the initial observation.\n",
-    "- `env.step` will execute a given action, move to the next state and return four values:\n",
+    "- `env.step` will execute a given action, move to the next state and return five values:\n",
     "  - a next observation\n",
     "  - a scalar reward\n",
     "  - a boolean value indicating whether the current state is terminal or not\n",
+    "  - a boolean value indicating whether the episode has been truncated or not\n",
     "  - additional information\n",
     "- `env.render` will render the current state. (optional)\n",
     "\n",
@@ -73,7 +74,7 @@
     }
    ],
    "source": [
-    "env = gym.make('CartPole-v0')\n",
+    "env = gymnasium.make('CartPole-v0')\n",
     "print('observation space:', env.observation_space)\n",
     "print('action space:', env.action_space)\n",
     "\n",
@@ -81,10 +82,11 @@
     "print('initial observation:', obs)\n",
     "\n",
     "action = env.action_space.sample()\n",
-    "obs, r, done, info = env.step(action)\n",
+    "obs, r, terminated, truncated, info = env.step(action)\n",
     "print('next observation:', obs)\n",
     "print('reward:', r)\n",
-    "print('done:', done)\n",
+    "print('terminated:', terminated)\n",
+    "print('terminated:', truncated)\n",
     "print('info:', info)\n",
     "\n",
     "# Uncomment to open a GUI window rendering the current state of the environment\n",
@@ -315,11 +317,11 @@
     "        # Uncomment to watch the behavior in a GUI window\n",
     "        # env.render()\n",
     "        action = agent.act(obs)\n",
-    "        obs, reward, done, _ = env.step(action)\n",
+    "        obs, reward, terminated, _, _ = env.step(action)\n",
     "        R += reward\n",
     "        t += 1\n",
     "        reset = t == max_episode_len\n",
-    "        agent.observe(obs, reward, done, reset)\n",
+    "        agent.observe(obs, reward, terminated, reset)\n",
     "        if done or reset:\n",
     "            break\n",
     "    if i % 10 == 0:\n",
@@ -373,11 +375,11 @@
     "            # Uncomment to watch the behavior in a GUI window\n",
     "            # env.render()\n",
     "            action = agent.act(obs)\n",
-    "            obs, r, done, _ = env.step(action)\n",
+    "            obs, r, terminated, _, _ = env.step(action)\n",
     "            R += r\n",
     "            t += 1\n",
     "            reset = t == 200\n",
-    "            agent.observe(obs, r, done, reset)\n",
+    "            agent.observe(obs, r, terminated, reset)\n",
     "            if done or reset:\n",
     "                break\n",
     "        print('evaluation episode:', i, 'R:', R)"
diff --git a/examples/slimevolley/README.md b/examples/slimevolley/README.md
index a3a4eac8f..b70b73032 100644
--- a/examples/slimevolley/README.md
+++ b/examples/slimevolley/README.md
@@ -37,7 +37,7 @@ python examples/slimevolley/train_rainbow.py --demo --render --load <path to age
 
 ## Results
 
-Below is the learning curve of the example script averaged over three trials with different random seeds. Each trial took around 18 hours for 2M steps. After every ~250K timesteps, the agent is evaluated for 1000 episodes, following the protocol of https://github.com/hardmaru/slimevolleygym/blob/master/TRAINING.md#ppo-and-cma-es-example-train-directly-against-baseline-policy (https://github.com/hardmaru/slimevolleygym/blob/2aba3d2c70429ff8447f17951a528dcfd5ab7cd0/training_scripts/train_ppo.py). 
+Below is the learning curve of the example script averaged over three trials with different random seeds. Each trial took around 18 hours for 2M steps. After every ~250K timesteps, the agent is evaluated for 1000 episodes, following the protocol of https://github.com/hardmaru/slimevolleygymn/blob/master/TRAINING.md#ppo-and-cma-es-example-train-directly-against-baseline-policy (https://github.com/hardmaru/slimevolleygymblob/2aba3d2c70429ff8447f17951a528dcfd5ab7cd0/training_scripts/train_ppo.py). 
 
 All the three trials outperform the baseline policy, i.e., achieving a positive average score over 1000 evaluation episodes, after 750K steps. The best trial among three achieved 0.541 after 1.75M steps, the model of which is used for the animation.
 
diff --git a/examples/slimevolley/train_rainbow.py b/examples/slimevolley/train_rainbow.py
index b309506ed..55c2e0a39 100644
--- a/examples/slimevolley/train_rainbow.py
+++ b/examples/slimevolley/train_rainbow.py
@@ -1,7 +1,7 @@
 import argparse
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import torch
 from torch import nn
@@ -12,22 +12,22 @@
 from pfrl import replay_buffers, utils
 
 
-class MultiBinaryAsDiscreteAction(gym.ActionWrapper):
+class MultiBinaryAsDiscreteAction(gymnasium.ActionWrapper):
     """Transforms MultiBinary action space to Discrete.
 
-    If the action space of a given env is `gym.spaces.MultiBinary(n)`, then
-    the action space of the wrapped env will be `gym.spaces.Discrete(2**n)`,
+    If the action space of a given env is `gymnasium.spaces.MultiBinary(n)`, then
+    the action space of the wrapped env will be `gymnasium.spaces.Discrete(2**n)`,
     which covers all the combinations of the original action space.
 
     Args:
-        env (gym.Env): Gym env whose action space is `gym.spaces.MultiBinary`.
+        env (gymnasium.Env): gymnasium env whose action space is `gymnasium.spaces.MultiBinary`.
     """
 
     def __init__(self, env):
         super().__init__(env)
-        assert isinstance(env.action_space, gym.spaces.MultiBinary)
+        assert isinstance(env.action_space, gymnasium.spaces.MultiBinary)
         self.orig_action_space = env.action_space
-        self.action_space = gym.spaces.Discrete(2**env.action_space.n)
+        self.action_space = gymnasium.spaces.Discrete(2**env.action_space.n)
 
     def action(self, action):
         return [(action >> i) % 2 for i in range(self.orig_action_space.n)]
@@ -129,10 +129,10 @@ def main():
 
     def make_env(test):
         if "SlimeVolley" in args.env:
-            # You need to install slimevolleygym
-            import slimevolleygym  # NOQA
+            # You need to install slimevolleygymnasium
+            import slimevolleygymnasium  # NOQA
 
-        env = gym.make(args.env)
+        env = gymnasium.make(args.env)
         # Use different random seeds for train and test envs
         env_seed = test_seed if test else train_seed
         env.seed(int(env_seed))
@@ -142,7 +142,7 @@ def make_env(test):
             )
         if args.render:
             env = pfrl.wrappers.Render(env)
-        if isinstance(env.action_space, gym.spaces.MultiBinary):
+        if isinstance(env.action_space, gymnasium.spaces.MultiBinary):
             env = MultiBinaryAsDiscreteAction(env)
         return env
 
diff --git a/examples_tests/gym/test_categorical_dqn.sh b/examples_tests/gym/test_categorical_dqn.sh
index db8c8505f..28181fb16 100644
--- a/examples_tests/gym/test_categorical_dqn.sh
+++ b/examples_tests/gym/test_categorical_dqn.sh
@@ -6,7 +6,7 @@ outdir=$(mktemp -d)
 
 gpu="$1"
 
-# gym/categorical_dqn
-python examples/gym/train_categorical_dqn_gym.py --steps 100 --replay-start-size 50 --outdir $outdir/gym/categorical_dqn --gpu $gpu
-model=$(find $outdir/gym/categorical_dqn -name "*_finish")
-python examples/gym/train_categorical_dqn_gym.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
+# gymnasium/categorical_dqn
+python examples/gymnasium/train_categorical_dqn_gymnasium.py --steps 100 --replay-start-size 50 --outdir $outdir/gymnasium/categorical_dqn --gpu $gpu
+model=$(find $outdir/gymnasium/categorical_dqn -name "*_finish")
+python examples/gymnasium/train_categorical_dqn_gymnasium.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
diff --git a/examples_tests/gym/test_dqn.sh b/examples_tests/gym/test_dqn.sh
index c4452538c..fca628ddf 100644
--- a/examples_tests/gym/test_dqn.sh
+++ b/examples_tests/gym/test_dqn.sh
@@ -6,7 +6,7 @@ outdir=$(mktemp -d)
 
 gpu="$1"
 
-# gym/dqn
-python examples/gym/train_dqn_gym.py --steps 100 --replay-start-size 50 --outdir $outdir/gym/dqn --gpu $gpu
-model=$(find $outdir/gym/dqn -name "*_finish")
-python examples/gym/train_dqn_gym.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
+# gymnasium/dqn
+python examples/gymnasium/train_dqn_gymnasium.py --steps 100 --replay-start-size 50 --outdir $outdir/gymnasium/dqn --gpu $gpu
+model=$(find $outdir/gymnasium/dqn -name "*_finish")
+python examples/gymnasium/train_dqn_gymnasium.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
diff --git a/examples_tests/gym/test_reinforce.sh b/examples_tests/gym/test_reinforce.sh
index 77a36bc89..f5a8d1e86 100644
--- a/examples_tests/gym/test_reinforce.sh
+++ b/examples_tests/gym/test_reinforce.sh
@@ -6,7 +6,7 @@ outdir=$(mktemp -d)
 
 gpu="$1"
 
-# gym/reinforce
-python examples/gym/train_reinforce_gym.py --steps 100 --batchsize 1 --outdir $outdir/gym/reinforce --gpu $gpu
-model=$(find $outdir/gym/reinforce -name "*_finish")
-python examples/gym/train_reinforce_gym.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
+# gymnasium/reinforce
+python examples/gymnasium/train_reinforce_gymnasium.py --steps 100 --batchsize 1 --outdir $outdir/gymnasium/reinforce --gpu $gpu
+model=$(find $outdir/gymnasium/reinforce -name "*_finish")
+python examples/gymnasium/train_reinforce_gymnasium.py --demo --load $model --eval-n-runs 1 --outdir $outdir/temp --gpu $gpu
diff --git a/examples_tests/slimevolley/test_rainbow.sh b/examples_tests/slimevolley/test_rainbow.sh
index 605f19b08..e2c48c133 100644
--- a/examples_tests/slimevolley/test_rainbow.sh
+++ b/examples_tests/slimevolley/test_rainbow.sh
@@ -7,7 +7,7 @@ outdir=$(mktemp -d)
 gpu="$1"
 
 # slimevolley/rainbow
-# Use CartPole-v0 to test without installing slimevolleygym
+# Use CartPole-v0 to test without installing slimevolleygymnasium
 python examples/slimevolley/train_rainbow.py --gpu $gpu --steps 100 --outdir $outdir/slimevolley/rainbow --env CartPole-v0
 model=$(find $outdir/slimevolley/rainbow -name "*_finish")
 python examples/slimevolley/train_rainbow.py --demo --load $model --eval-n-episodes 1 --outdir $outdir/temp --gpu $gpu --env CartPole-v0
diff --git a/pfrl/envs/abc.py b/pfrl/envs/abc.py
index 29b7b8e29..53e5591fe 100644
--- a/pfrl/envs/abc.py
+++ b/pfrl/envs/abc.py
@@ -1,5 +1,6 @@
 import numpy as np
-from gym import spaces
+import gymnasium as gym
+from gymnasium import spaces
 
 from pfrl import env
 
@@ -123,7 +124,7 @@ def reset(self):
                 self._offset = np.random.randint(self.n_max_offset + 1)
         else:
             self._offset = 0
-        return self.observe()
+        return self.observe(), {}
 
     def step(self, action):
         if isinstance(self.action_space, spaces.Box):
diff --git a/pfrl/envs/multiprocess_vector_env.py b/pfrl/envs/multiprocess_vector_env.py
index a993e1940..540552f17 100644
--- a/pfrl/envs/multiprocess_vector_env.py
+++ b/pfrl/envs/multiprocess_vector_env.py
@@ -16,11 +16,11 @@ def worker(remote, env_fn):
         while True:
             cmd, data = remote.recv()
             if cmd == "step":
-                ob, reward, done, info = env.step(data)
-                remote.send((ob, reward, done, info))
+                ob, reward, terminated, truncated, info = env.step(data)
+                remote.send((ob, reward, terminated, truncated, info))
             elif cmd == "reset":
-                ob = env.reset()
-                remote.send(ob)
+                ob, info = env.reset()
+                remote.send((ob, info))
             elif cmd == "close":
                 remote.close()
                 break
@@ -41,7 +41,7 @@ class MultiprocessVectorEnv(pfrl.env.VectorEnv):
 
     Args:
         env_fns (list of callable): List of callables, each of which
-            returns gym.Env that is run in its own subprocess.
+            returns gymnasium.Env that is run in its own subprocess.
     """
 
     def __init__(self, env_fns):
@@ -83,8 +83,8 @@ def step(self, actions):
         for remote, action in zip(self.remotes, actions):
             remote.send(("step", action))
         results = [remote.recv() for remote in self.remotes]
-        self.last_obs, rews, dones, infos = zip(*results)
-        return self.last_obs, rews, dones, infos
+        self.last_obs, rews, terminateds, truncateds, infos = zip(*results)
+        return self.last_obs, rews, terminateds, truncateds, infos
 
     def reset(self, mask=None):
         self._assert_not_closed()
@@ -94,12 +94,13 @@ def reset(self, mask=None):
             if not m:
                 remote.send(("reset", None))
 
-        obs = [
-            remote.recv() if not m else o
+        results = [
+            remote.recv() if not m else (o, {})
             for m, remote, o in zip(mask, self.remotes, self.last_obs)
         ]
+        obs, info = zip(*results)
         self.last_obs = obs
-        return obs
+        return obs, info
 
     def close(self):
         self._assert_not_closed()
diff --git a/pfrl/envs/serial_vector_env.py b/pfrl/envs/serial_vector_env.py
index 73104adfe..7c2416fe6 100644
--- a/pfrl/envs/serial_vector_env.py
+++ b/pfrl/envs/serial_vector_env.py
@@ -10,7 +10,7 @@ class SerialVectorEnv(pfrl.env.VectorEnv):
     use MultiprocessVectorEnv if possible.
 
     Args:
-        env_fns (list of gym.Env): List of gym.Env.
+        env_fns (list of gymnasium.Env): List of gymnasium.Env.
     """
 
     def __init__(self, envs):
@@ -22,8 +22,8 @@ def __init__(self, envs):
 
     def step(self, actions):
         results = [env.step(a) for env, a in zip(self.envs, actions)]
-        self.last_obs, rews, dones, infos = zip(*results)
-        return self.last_obs, rews, dones, infos
+        self.last_obs, rews, terminations, truncations, infos = zip(*results)
+        return self.last_obs, rews, terminations, truncations,  infos
 
     def reset(self, mask=None):
         if mask is None:
@@ -33,7 +33,7 @@ def reset(self, mask=None):
             for m, env, o in zip(mask, self.envs, self.last_obs)
         ]
         self.last_obs = obs
-        return obs
+        return obs, {}
 
     def seed(self, seeds):
         for env, seed in zip(self.envs, seeds):
diff --git a/pfrl/experiments/evaluator.py b/pfrl/experiments/evaluator.py
index 75691784c..4b0afbede 100644
--- a/pfrl/experiments/evaluator.py
+++ b/pfrl/experiments/evaluator.py
@@ -8,7 +8,6 @@
 
 import pfrl
 
-
 def _run_episodes(
     env,
     agent,
@@ -23,24 +22,24 @@ def _run_episodes(
     logger = logger or logging.getLogger(__name__)
     scores = []
     lengths = []
-    terminate = False
+    terminated = False
     timestep = 0
 
     reset = True
-    while not terminate:
+    while not terminated:
         if reset:
-            obs = env.reset()
-            done = False
+            obs, info = env.reset()
+            terminated = False
             test_r = 0
             episode_len = 0
             info = {}
         a = agent.act(obs)
-        obs, r, done, info = env.step(a)
+        obs, r, terminated, truncated, info = env.step(a)
         test_r += r
         episode_len += 1
         timestep += 1
-        reset = done or episode_len == max_episode_len or info.get("needs_reset", False)
-        agent.observe(obs, r, done, reset)
+        reset = terminated or episode_len == max_episode_len or info.get("needs_reset", False) or truncated
+        agent.observe(obs, r, terminated, reset)
         if reset:
             logger.info(
                 "evaluation episode %s length:%s R:%s", len(scores), episode_len, test_r
@@ -50,9 +49,9 @@ def _run_episodes(
             scores.append(float(test_r))
             lengths.append(float(episode_len))
         if n_steps is None:
-            terminate = len(scores) >= n_episodes
+            terminated = len(scores) >= n_episodes
         else:
-            terminate = timestep >= n_steps
+            terminated = timestep >= n_steps
     # If all steps were used for a single unfinished episode
     if len(scores) == 0:
         scores.append(float(test_r))
@@ -120,7 +119,7 @@ def _batch_run_episodes(
     episode_r = np.zeros(num_envs, dtype=np.float64)
     episode_len = np.zeros(num_envs, dtype="i")
 
-    obss = env.reset()
+    obss, infos = env.reset()
     rs = np.zeros(num_envs, dtype="f")
 
     termination_conditions = False
@@ -130,7 +129,7 @@ def _batch_run_episodes(
         actions = agent.batch_act(obss)
         timestep += 1
         # o_{t+1}, r_{t+1}
-        obss, rs, dones, infos = env.step(actions)
+        obss, rs, terminations, truncations, infos = env.step(actions)
         episode_r += rs
         episode_len += 1
         # Compute mask for done and reset
@@ -139,11 +138,11 @@ def _batch_run_episodes(
         else:
             resets = episode_len == max_episode_len
         resets = np.logical_or(
-            resets, [info.get("needs_reset", False) for info in infos]
+            resets, [info.get("needs_reset", False) or truncated for truncated, info in zip(truncations, infos)]
         )
 
         # Make mask. 0 if done/reset, 1 if pass
-        end = np.logical_or(resets, dones)
+        end = np.logical_or(resets, terminations)
         not_end = np.logical_not(end)
 
         for index in range(len(end)):
@@ -199,12 +198,12 @@ def _batch_run_episodes(
             resets.fill(True)
 
         # Agent observes the consequences.
-        agent.batch_observe(obss, rs, dones, resets)
+        agent.batch_observe(obss, rs, terminations, resets)
 
         if termination_conditions:
             break
         else:
-            obss = env.reset(not_end)
+            obss, infos = env.reset(not_end)
 
     for i, (epi_len, epi_ret) in enumerate(
         zip(eval_episode_lens, eval_episode_returns)
diff --git a/pfrl/experiments/train_agent.py b/pfrl/experiments/train_agent.py
index c28e71b35..81321b9ac 100644
--- a/pfrl/experiments/train_agent.py
+++ b/pfrl/experiments/train_agent.py
@@ -41,7 +41,7 @@ def train_agent(
     episode_idx = 0
 
     # o_0, r_0
-    obs = env.reset()
+    obs , info = env.reset()
 
     t = step_offset
     if hasattr(agent, "t"):
@@ -54,17 +54,17 @@ def train_agent(
             # a_t
             action = agent.act(obs)
             # o_{t+1}, r_{t+1}
-            obs, r, done, info = env.step(action)
+            obs, r, terminated, truncated, info = env.step(action)
             t += 1
             episode_r += r
             episode_len += 1
-            reset = episode_len == max_episode_len or info.get("needs_reset", False)
-            agent.observe(obs, r, done, reset)
+            reset = episode_len == max_episode_len or info.get("needs_reset", False) or truncated
+            agent.observe(obs, r, terminated, reset)
 
             for hook in step_hooks:
                 hook(env, agent, t)
 
-            episode_end = done or reset or t == steps
+            episode_end = terminated or reset or t == steps
 
             if episode_end:
                 logger.info(
@@ -96,7 +96,7 @@ def train_agent(
                 # Start a new episode
                 episode_r = 0
                 episode_len = 0
-                obs = env.reset()
+                obs, info = env.reset()
             if checkpoint_freq and t % checkpoint_freq == 0:
                 save_agent(agent, t, outdir, logger, suffix="_checkpoint")
 
diff --git a/pfrl/experiments/train_agent_async.py b/pfrl/experiments/train_agent_async.py
index d8b3b4057..9e5971523 100644
--- a/pfrl/experiments/train_agent_async.py
+++ b/pfrl/experiments/train_agent_async.py
@@ -58,7 +58,7 @@ def save_model():
         global_t = 0
         local_t = 0
         global_episodes = 0
-        obs = env.reset()
+        obs, info = env.reset()
         episode_len = 0
         successful = False
 
@@ -66,12 +66,12 @@ def save_model():
             # a_t
             a = agent.act(obs)
             # o_{t+1}, r_{t+1}
-            obs, r, done, info = env.step(a)
+            obs, r, terminated, truncated, info = env.step(a)
             local_t += 1
             episode_r += r
             episode_len += 1
-            reset = episode_len == max_episode_len or info.get("needs_reset", False)
-            agent.observe(obs, r, done, reset)
+            reset = episode_len == max_episode_len or info.get("needs_reset", False) or truncated
+            agent.observe(obs, r, terminated, reset)
 
             # Get and increment the global counter
             with counter.get_lock():
@@ -81,7 +81,7 @@ def save_model():
             for hook in global_step_hooks:
                 hook(env, agent, global_t)
 
-            if done or reset or global_t >= steps or stop_event.is_set():
+            if terminated or reset or global_t >= steps or stop_event.is_set():
                 if process_idx == 0:
                     logger.info(
                         "outdir:%s global_step:%s local_step:%s R:%s",
@@ -119,7 +119,7 @@ def save_model():
                 # Start a new episode
                 episode_r = 0
                 episode_len = 0
-                obs = env.reset()
+                obs, info = env.reset()
 
             if process_idx == 0 and exception_event.is_set():
                 logger.exception("An exception detected, exiting")
diff --git a/pfrl/experiments/train_agent_batch.py b/pfrl/experiments/train_agent_batch.py
index add7cda81..8826830ef 100644
--- a/pfrl/experiments/train_agent_batch.py
+++ b/pfrl/experiments/train_agent_batch.py
@@ -54,7 +54,7 @@ def train_agent_batch(
     episode_len = np.zeros(num_envs, dtype="i")
 
     # o_0, r_0
-    obss = env.reset()
+    obss, infos = env.reset()
 
     t = step_offset
     if hasattr(agent, "t"):
@@ -66,7 +66,7 @@ def train_agent_batch(
             # a_t
             actions = agent.batch_act(obss)
             # o_{t+1}, r_{t+1}
-            obss, rs, dones, infos = env.step(actions)
+            obss, rs, terminations, truncations, infos = env.step(actions)
             episode_r += rs
             episode_len += 1
 
@@ -76,13 +76,13 @@ def train_agent_batch(
             else:
                 resets = episode_len == max_episode_len
             resets = np.logical_or(
-                resets, [info.get("needs_reset", False) for info in infos]
+                resets, [info.get("needs_reset", False) or truncation for truncation, info in zip(truncations, infos)]
             )
             # Agent observes the consequences
-            agent.batch_observe(obss, rs, dones, resets)
+            agent.batch_observe(obss, rs, terminations, resets)
 
-            # Make mask. 0 if done/reset, 1 if pass
-            end = np.logical_or(resets, dones)
+            # Make mask. 0 if termination/reset, 1 if pass
+            end = np.logical_or(resets, terminations)
             not_end = np.logical_not(end)
 
             # For episodes that ends, do the following:
@@ -138,7 +138,7 @@ def train_agent_batch(
             # Start new episodes if needed
             episode_r[end] = 0
             episode_len[end] = 0
-            obss = env.reset(not_end)
+            obss, infos = env.reset(not_end)
 
     except (Exception, KeyboardInterrupt):
         # Save the current model before being killed
diff --git a/pfrl/utils/env_modifiers.py b/pfrl/utils/env_modifiers.py
index a605b7b71..2c8b94259 100644
--- a/pfrl/utils/env_modifiers.py
+++ b/pfrl/utils/env_modifiers.py
@@ -24,11 +24,11 @@ def make_timestep_limited(env, timestep_limit):
     old_reset = env.reset
 
     def step(action):
-        observation, reward, done, info = old_step(action)
+        observation, reward, done, truncated, info = old_step(action)
         if t[0] >= timestep_limit:
             done = True
         t[0] += 1
-        return observation, reward, done, info
+        return observation, reward, done, truncated, info
 
     def reset():
         t[0] = 1
@@ -51,9 +51,9 @@ def make_reward_filtered(env, reward_filter):
     old_step = env.step
 
     def step(action):
-        observation, reward, done, info = old_step(action)
+        observation, reward, done, truncated, info = old_step(action)
         reward = reward_filter(reward)
-        return observation, reward, done, info
+        return observation, reward, done, truncated, info
 
     env.step = step
 
@@ -73,10 +73,10 @@ def make_action_repeated(env, n_times):
     def step(action):
         r_total = 0
         for _ in range(n_times):
-            obs, r, done, info = old_step(action)
+            obs, r, done, truncated, info = old_step(action)
             r_total += r
             if done:
                 break
-        return obs, r_total, done, info
+        return obs, r_total, done, truncated, info
 
     env.step = step
diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py
index 3c7e64d02..37e7bc5a0 100644
--- a/pfrl/utils/pretrained_models.py
+++ b/pfrl/utils/pretrained_models.py
@@ -162,7 +162,7 @@ def download_model(alg, env, model_type="best"):
 
     Args:
         alg (string): URL to download from.
-        env (string): Gym Environment name.
+        env (string): gymnasium Environment name.
         model_type (string): Either `best` or `final`.
     Returns:
         str: Path to the downloaded file.
diff --git a/pfrl/wrappers/__init__.py b/pfrl/wrappers/__init__.py
index 0f3e99258..ae26a4db5 100644
--- a/pfrl/wrappers/__init__.py
+++ b/pfrl/wrappers/__init__.py
@@ -1,7 +1,5 @@
 from pfrl.wrappers.cast_observation import CastObservation  # NOQA
 from pfrl.wrappers.cast_observation import CastObservationToFloat32  # NOQA
-from pfrl.wrappers.continuing_time_limit import ContinuingTimeLimit  # NOQA
-from pfrl.wrappers.monitor import Monitor  # NOQA
 from pfrl.wrappers.normalize_action_space import NormalizeActionSpace  # NOQA
 from pfrl.wrappers.randomize_action import RandomizeAction  # NOQA
 from pfrl.wrappers.render import Render  # NOQA
diff --git a/pfrl/wrappers/atari_wrappers.py b/pfrl/wrappers/atari_wrappers.py
index 2a4977952..02a821f7e 100644
--- a/pfrl/wrappers/atari_wrappers.py
+++ b/pfrl/wrappers/atari_wrappers.py
@@ -4,13 +4,14 @@
 
 from collections import deque
 
-import gym
+import gymnasium as gym
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 from packaging import version
 
 import pfrl
 
+
 try:
     import cv2
 
@@ -45,10 +46,10 @@ def reset(self, **kwargs):
         assert noops > 0
         obs = None
         for _ in range(noops):
-            obs, _, done, info = self.env.step(self.noop_action)
-            if done or info.get("needs_reset", False):
-                obs = self.env.reset(**kwargs)
-        return obs
+            obs, _, done, truncated, info = self.env.step(self.noop_action)
+            if done or info.get("needs_reset", False) or truncated:
+                obs, info = self.env.reset(**kwargs)
+        return obs, info
 
     def step(self, ac):
         return self.env.step(ac)
@@ -63,13 +64,13 @@ def __init__(self, env):
 
     def reset(self, **kwargs):
         self.env.reset(**kwargs)
-        obs, _, done, info = self.env.step(1)
-        if done or info.get("needs_reset", False):
+        obs, _, done, truncated, info = self.env.step(1)
+        if done or info.get("needs_reset", False) or truncated:
             self.env.reset(**kwargs)
         obs, _, done, info = self.env.step(2)
-        if done or info.get("needs_reset", False):
+        if done or info.get("needs_reset", False) or truncated:
             self.env.reset(**kwargs)
-        return obs
+        return obs, {}
 
     def step(self, ac):
         return self.env.step(ac)
@@ -86,8 +87,8 @@ def __init__(self, env):
         self.needs_real_reset = True
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        self.needs_real_reset = done or info.get("needs_reset", False)
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.needs_real_reset = terminated or info.get("needs_reset", False) or truncated
         # check current lives, make loss of life terminal,
         # then update lives to handle bonus lives
         lives = self.env.unwrapped.ale.lives()
@@ -96,9 +97,9 @@ def step(self, action):
             # frames
             # so its important to keep lives > 0, so that we only reset once
             # the environment advertises done.
-            done = True
+            terminated = True
         self.lives = lives
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         """Reset only when lives are exhausted.
@@ -107,12 +108,12 @@ def reset(self, **kwargs):
         and the learner need not know about any of this behind-the-scenes.
         """
         if self.needs_real_reset:
-            obs = self.env.reset(**kwargs)
+            obs, info = self.env.reset(**kwargs)
         else:
             # no-op step to advance from terminal/lost life state
-            obs, _, _, _ = self.env.step(0)
+            obs, _, _, _, info = self.env.step(0)
         self.lives = self.env.unwrapped.ale.lives()
-        return obs
+        return obs, info
 
 
 class MaxAndSkipEnv(gym.Wrapper):
@@ -128,19 +129,19 @@ def step(self, action):
         total_reward = 0.0
         done = None
         for i in range(self._skip):
-            obs, reward, done, info = self.env.step(action)
+            obs, reward, done, truncated, info = self.env.step(action)
             if i == self._skip - 2:
                 self._obs_buffer[0] = obs
             if i == self._skip - 1:
                 self._obs_buffer[1] = obs
             total_reward += reward
-            if done or info.get("needs_reset", False):
+            if done or info.get("needs_reset", False) or truncated:
                 break
         # Note that the observation on the done=True frame
         # doesn't matter
         max_frame = self._obs_buffer.max(axis=0)
 
-        return max_frame, total_reward, done, info
+        return max_frame, total_reward, done, truncated, info
 
     def reset(self, **kwargs):
         return self.env.reset(**kwargs)
@@ -207,15 +208,15 @@ def __init__(self, env, k, channel_order="hwc"):
         )
 
     def reset(self):
-        ob = self.env.reset()
+        ob, info = self.env.reset()
         for _ in range(self.k):
             self.frames.append(ob)
-        return self._get_ob()
+        return self._get_ob(), info
 
     def step(self, action):
-        ob, reward, done, info = self.env.step(action)
+        ob, reward, done, truncated, info = self.env.step(action)
         self.frames.append(ob)
-        return self._get_ob(), reward, done, info
+        return self._get_ob(), reward, done, truncated, info
 
     def _get_ob(self):
         assert len(self.frames) == self.k
@@ -286,13 +287,11 @@ def observation(self, observation):
 
 
 def make_atari(env_id, max_frames=30 * 60 * 60):
-    env = gym.make(env_id)
+    env = gym.make(env_id,
+                         repeat_action_probability=0.0,
+                         full_action_space=False, frameskip=1,
+                         max_num_frames_per_episode=max_frames)
     assert "NoFrameskip" in env.spec.id
-    assert isinstance(env, gym.wrappers.TimeLimit)
-    # Unwrap TimeLimit wrapper because we use our own time limits
-    env = env.env
-    if max_frames:
-        env = pfrl.wrappers.ContinuingTimeLimit(env, max_episode_steps=max_frames)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     return env
diff --git a/pfrl/wrappers/cast_observation.py b/pfrl/wrappers/cast_observation.py
index 4519e6fd4..2fc853243 100644
--- a/pfrl/wrappers/cast_observation.py
+++ b/pfrl/wrappers/cast_observation.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 import numpy as np
 
 
diff --git a/pfrl/wrappers/continuing_time_limit.py b/pfrl/wrappers/continuing_time_limit.py
deleted file mode 100644
index 04d7bec4f..000000000
--- a/pfrl/wrappers/continuing_time_limit.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import gym
-
-
-class ContinuingTimeLimit(gym.Wrapper):
-    """TimeLimit wrapper for continuing environments.
-
-    This is similar gym.wrappers.TimeLimit, which sets a time limit for
-    each episode, except that done=False is returned and that
-    info['needs_reset'] is set to True when past the limit.
-
-    Code that calls env.step is responsible for checking the info dict, the
-    fourth returned value, and resetting the env if it has the 'needs_reset'
-    key and its value is True.
-
-    Args:
-        env (gym.Env): Env to wrap.
-        max_episode_steps (int): Maximum number of timesteps during an episode,
-            after which the env needs a reset.
-    """
-
-    def __init__(self, env, max_episode_steps):
-        super(ContinuingTimeLimit, self).__init__(env)
-        self._max_episode_steps = max_episode_steps
-
-        self._elapsed_steps = None
-
-    def step(self, action):
-        assert (
-            self._elapsed_steps is not None
-        ), "Cannot call env.step() before calling reset()"
-        observation, reward, done, info = self.env.step(action)
-        self._elapsed_steps += 1
-
-        if self._max_episode_steps <= self._elapsed_steps:
-            info["needs_reset"] = True
-
-        return observation, reward, done, info
-
-    def reset(self):
-        self._elapsed_steps = 0
-        return self.env.reset()
diff --git a/pfrl/wrappers/gym_wrapper.py b/pfrl/wrappers/gym_wrapper.py
new file mode 100644
index 000000000..728cb40c6
--- /dev/null
+++ b/pfrl/wrappers/gym_wrapper.py
@@ -0,0 +1,16 @@
+import gymnasium
+
+
+class GymWrapper(gymnasium.Env):
+    def __init__(self, gym_env):
+        """A Gymnasium environment that wraps OpenAI gym environments."""
+        super(GymWrapper, self).__init__()
+        self.env = gym_env
+
+    def reset(self, **kwargs):
+        obs = self.env.reset()
+        return obs, {}
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        return obs, reward, done, False, info
diff --git a/pfrl/wrappers/monitor.py b/pfrl/wrappers/monitor.py
deleted file mode 100644
index 4e8e842da..000000000
--- a/pfrl/wrappers/monitor.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import time
-from logging import getLogger
-
-try:
-    from gym.wrappers import Monitor as _GymMonitor
-except ImportError:
-
-    class _Stub:
-        def __init__(self, *args, **kwargs):
-            raise RuntimeError("Monitor is not available in this version of gym")
-
-    class _GymMonitor(_Stub):  # type: ignore
-        pass
-
-    class _GymStatsRecorder(_Stub):
-        pass
-
-else:
-    from gym.wrappers.monitoring.stats_recorder import StatsRecorder as _GymStatsRecorder  # type: ignore  # isort: skip  # noqa: E501
-
-
-class Monitor(_GymMonitor):
-    """`Monitor` with PFRL's `ContinuingTimeLimit` support.
-
-    `Agent` in PFRL might reset the env even when `done=False`
-    if `ContinuingTimeLimit` returns `info['needs_reset']=True`,
-    which is not expected for `gym.Monitor`.
-
-    For details, see
-    https://github.com/openai/gym/blob/master/gym/wrappers/monitor.py
-    """
-
-    def _start(
-        self,
-        directory,
-        video_callable=None,
-        force=False,
-        resume=False,
-        write_upon_reset=False,
-        uid=None,
-        mode=None,
-    ):
-        if self.env_semantics_autoreset:
-            raise NotImplementedError(
-                "Detect 'semantics.autoreset=True' in `env.metadata`, "
-                "which means the env is from deprecated OpenAI Universe."
-            )
-        ret = super()._start(
-            directory=directory,
-            video_callable=video_callable,
-            force=force,
-            resume=resume,
-            write_upon_reset=write_upon_reset,
-            uid=uid,
-            mode=mode,
-        )
-        env_id = self.stats_recorder.env_id
-        self.stats_recorder = _StatsRecorder(
-            directory,
-            "{}.episode_batch.{}".format(self.file_prefix, self.file_infix),
-            autoreset=False,
-            env_id=env_id,
-        )
-        if mode is not None:
-            self._set_mode(mode)
-        return ret
-
-
-class _StatsRecorder(_GymStatsRecorder):
-    """`StatsRecorder` with PFRL's `ContinuingTimeLimit` support.
-
-    For details, see
-    https://github.com/openai/gym/blob/master/gym/wrappers/monitoring/stats_recorder.py
-    """
-
-    def __init__(
-        self,
-        directory,
-        file_prefix,
-        autoreset=False,
-        env_id=None,
-        logger=getLogger(__name__),
-    ):
-        super().__init__(directory, file_prefix, autoreset=autoreset, env_id=env_id)
-        self._save_completed = True
-        self.logger = logger
-
-    def before_reset(self):
-        assert not self.closed
-
-        if self.done is not None and not self.done and self.steps > 0:
-            self.logger.debug(
-                "Tried to reset the env which is not done=True. "
-                "StatsRecorder completes the last episode."
-            )
-            self.save_complete()
-
-        self.done = False
-        if self.initial_reset_timestamp is None:
-            self.initial_reset_timestamp = time.time()
-
-    def after_step(self, observation, reward, done, info):
-        self._save_completed = False
-        return super().after_step(observation, reward, done, info)
-
-    def save_complete(self):
-        if not self._save_completed:
-            super().save_complete()
-            self._save_completed = True
-
-    def close(self):
-        self.save_complete()
-        super().close()
diff --git a/pfrl/wrappers/normalize_action_space.py b/pfrl/wrappers/normalize_action_space.py
index dbf0ed24f..3e485c91f 100644
--- a/pfrl/wrappers/normalize_action_space.py
+++ b/pfrl/wrappers/normalize_action_space.py
@@ -1,15 +1,15 @@
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 
 
-class NormalizeActionSpace(gym.ActionWrapper):
+class NormalizeActionSpace(gymnasium.ActionWrapper):
     """Normalize a Box action space to [-1, 1]^n."""
 
     def __init__(self, env):
         super().__init__(env)
-        assert isinstance(env.action_space, gym.spaces.Box)
-        self.action_space = gym.spaces.Box(
+        assert isinstance(env.action_space, gymnasium.spaces.Box)
+        self.action_space = gymnasium.spaces.Box(
             low=-np.ones_like(env.action_space.low),
             high=np.ones_like(env.action_space.low),
         )
diff --git a/pfrl/wrappers/randomize_action.py b/pfrl/wrappers/randomize_action.py
index 9390f33bf..d9485aa8c 100644
--- a/pfrl/wrappers/randomize_action.py
+++ b/pfrl/wrappers/randomize_action.py
@@ -1,21 +1,21 @@
-import gym
+import gymnasium
 import numpy as np
 
 
-class RandomizeAction(gym.ActionWrapper):
+class RandomizeAction(gymnasium.ActionWrapper):
     """Apply a random action instead of the one sent by the agent.
 
     This wrapper can be used to make a stochastic env. The common use is
     for evaluation in Atari environments, where actions are replaced with
     random ones with a low probability.
 
-    Only gym.spaces.Discrete is supported as an action space.
+    Only gymnasium.spaces.Discrete is supported as an action space.
 
     For exploration during training, use explorers like
     pfrl.explorers.ConstantEpsilonGreedy instead of this wrapper.
 
     Args:
-        env (gym.Env): Env to wrap.
+        env (gymnasium.Env): Env to wrap.
         random_fraction (float): Fraction of actions that will be replaced
             with a random action. It must be in [0, 1].
     """
@@ -24,17 +24,19 @@ def __init__(self, env, random_fraction):
         super().__init__(env)
         assert 0 <= random_fraction <= 1
         assert isinstance(
-            env.action_space, gym.spaces.Discrete
-        ), "RandomizeAction supports only gym.spaces.Discrete as an action space"
+            env.action_space, gymnasium.spaces.Discrete
+        ), "RandomizeAction supports only gymnasium.spaces.Discrete as an action space"
         self._random_fraction = random_fraction
-        self._np_random = np.random.RandomState()
+        self._rng = np.random.RandomState()
 
     def action(self, action):
-        if self._np_random.rand() < self._random_fraction:
-            return self._np_random.randint(self.env.action_space.n)
+        if self._rng.rand() < self._random_fraction:
+            return self._rng.randint(self.env.action_space.n)
         else:
             return action
 
-    def seed(self, seed):
-        super().seed(seed)
-        self._np_random.seed(seed)
+    def reset(self, **kwargs):
+        if 'seed' in kwargs:
+            self._rng = np.random.RandomState(kwargs['seed'])
+        return self.env.reset(**kwargs)
+
diff --git a/pfrl/wrappers/render.py b/pfrl/wrappers/render.py
index 6dc0c0384..83dede7aa 100644
--- a/pfrl/wrappers/render.py
+++ b/pfrl/wrappers/render.py
@@ -1,11 +1,11 @@
-import gym
+import gymnasium
 
 
-class Render(gym.Wrapper):
+class Render(gymnasium.Wrapper):
     """Render env by calling its render method.
 
     Args:
-        env (gym.Env): Env to wrap.
+        env (gymnasium.Env): Env to wrap.
         **kwargs: Keyword arguments passed to the render method.
     """
 
@@ -16,7 +16,7 @@ def __init__(self, env, **kwargs):
     def reset(self, **kwargs):
         ret = self.env.reset(**kwargs)
         self.env.render(**self._kwargs)
-        return ret
+        return ret, {}
 
     def step(self, action):
         ret = self.env.step(action)
diff --git a/pfrl/wrappers/scale_reward.py b/pfrl/wrappers/scale_reward.py
index 784616da5..d34a238d3 100644
--- a/pfrl/wrappers/scale_reward.py
+++ b/pfrl/wrappers/scale_reward.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 
 
 class ScaleReward(gym.RewardWrapper):
diff --git a/pfrl/wrappers/vector_frame_stack.py b/pfrl/wrappers/vector_frame_stack.py
index 5596f5b87..1165b7e18 100644
--- a/pfrl/wrappers/vector_frame_stack.py
+++ b/pfrl/wrappers/vector_frame_stack.py
@@ -1,14 +1,14 @@
 from collections import deque
 
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 
 from pfrl.env import VectorEnv
 from pfrl.wrappers.atari_wrappers import LazyFrames
 
 
 class VectorEnvWrapper(VectorEnv):
-    """VectorEnv analog to gym.Wrapper."""
+    """VectorEnv analog to gymnasium.Wrapper."""
 
     def __init__(self, env):
         self.env = env
@@ -88,13 +88,13 @@ def reset(self, mask=None):
             if not m:
                 for _ in range(self.k):
                     frames.append(ob)
-        return self._get_ob()
+        return self._get_ob(), {}
 
     def step(self, action):
-        batch_ob, reward, done, info = self.env.step(action)
+        batch_ob, reward, terminated, _, info = self.env.step(action)
         for frames, ob in zip(self.frames, batch_ob):
             frames.append(ob)
-        return self._get_ob(), reward, done, info
+        return self._get_ob(), reward, terminated, info
 
     def _get_ob(self):
         assert len(self.frames) == self.env.num_envs
diff --git a/requirements.txt b/requirements.txt
index 45b6e8b0b..2ac56ecd7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 torch>=1.3.0
-gym>=0.9.7
+gymnasium>=0.9.7
 numpy>=1.10.4
 filelock
 pillow
diff --git a/setup.cfg b/setup.cfg
index 808dfd412..84f1c2234 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,7 +3,7 @@
 [mypy-torch.*]
 ignore_missing_imports = True
 
-[mypy-gym.*]
+[mypy-gymnasium.*]
 ignore_missing_imports = True
 
 [mypy-numpy.*]
diff --git a/setup.py b/setup.py
index 47a5e4ef2..e037cf196 100644
--- a/setup.py
+++ b/setup.py
@@ -4,8 +4,8 @@
 
 install_requires = [
     'torch>=1.3.0',
-    'gym>=0.9.7',
-    'numpy>=1.10.4',
+    'gymnasium[atari]',
+    'numpy>=1.11.0',
     'pillow',
     'filelock',
 ]
diff --git a/tests/envs_tests/test_vector_envs.py b/tests/envs_tests/test_vector_envs.py
index 768c09cc1..7a89e9984 100644
--- a/tests/envs_tests/test_vector_envs.py
+++ b/tests/envs_tests/test_vector_envs.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium
 import numpy as np
 import pytest
 
@@ -21,16 +21,16 @@ def setUp(self, num_envs, env_id, random_seed_offset, vector_env_to_test):
         # Init VectorEnv to test
         if self.vector_env_to_test == "SerialVectorEnv":
             self.vec_env = pfrl.envs.SerialVectorEnv(
-                [gym.make(self.env_id) for _ in range(self.num_envs)]
+                [gymnasium.make(self.env_id) for _ in range(self.num_envs)]
             )
         elif self.vector_env_to_test == "MultiprocessVectorEnv":
             self.vec_env = pfrl.envs.MultiprocessVectorEnv(
-                [(lambda: gym.make(self.env_id)) for _ in range(self.num_envs)]
+                [(lambda: gymnasium.make(self.env_id)) for _ in range(self.num_envs)]
             )
         else:
             assert False
         # Init envs to compare against
-        self.envs = [gym.make(self.env_id) for _ in range(self.num_envs)]
+        self.envs = [gymnasium.make(self.env_id) for _ in range(self.num_envs)]
 
     def teardown_method(self):
         # Delete so that all the subprocesses are joined
@@ -59,14 +59,15 @@ def test_seed_reset_and_step(self):
 
         # step
         actions = [env.action_space.sample() for env in self.envs]
-        real_obss, real_rewards, real_dones, real_infos = zip(
+        real_obss, real_rewards, real_terminations, real_truncations, real_infos = zip(
             *[env.step(action) for env, action in zip(self.envs, actions)]
         )
-        obss, rewards, dones, infos = self.vec_env.step(actions)
+        obss, rewards, terminations, truncations, infos = self.vec_env.step(actions)
         np.testing.assert_allclose(obss, real_obss)
         assert rewards == real_rewards
-        assert dones == real_dones
+        assert terminations == real_terminations
         assert infos == real_infos
+        assert truncations == real_truncations
 
         # reset with full mask should have no effect
         mask = np.ones(self.num_envs)
diff --git a/tests/experiments_tests/test_evaluator.py b/tests/experiments_tests/test_evaluator.py
index 6c0d96b23..2f6f82791 100644
--- a/tests/experiments_tests/test_evaluator.py
+++ b/tests/experiments_tests/test_evaluator.py
@@ -21,8 +21,8 @@ def test_evaluator_evaluate_if_necessary(save_best_so_far_agent, n_steps, n_epis
     agent.get_statistics.return_value = []
 
     env = mock.Mock()
-    env.reset.return_value = "obs"
-    env.step.return_value = ("obs", 0, True, {})
+    env.reset.return_value = "obs", {}
+    env.step.return_value = ("obs", 0, True, False, {})
     env.get_statistics.return_value = []
 
     evaluation_hook = mock.create_autospec(
@@ -88,7 +88,7 @@ def test_evaluator_evaluate_if_necessary(save_best_so_far_agent, n_steps, n_epis
             assert agent.save.call_count == 0
 
         # Third evaluation with a better score
-        env.step.return_value = ("obs", 1, True, {})
+        env.step.return_value = ("obs", 1, True, False,  {})
         agent_evaluator.evaluate_if_necessary(t=9, episodes=9)
         assert agent.act.call_count == 3 * value
         assert agent.observe.call_count == 3 * value
@@ -110,8 +110,8 @@ def test_async_evaluator_evaluate_if_necessary(save_best_so_far_agent, n_episode
     agent.get_statistics.return_value = []
 
     env = mock.Mock()
-    env.reset.return_value = "obs"
-    env.step.return_value = ("obs", 0, True, {})
+    env.reset.return_value = "obs", {}
+    env.step.return_value = ("obs", 0, True, False, {})
     env.get_statistics.return_value = []
 
     evaluation_hook = mock.create_autospec(
@@ -158,7 +158,7 @@ def test_async_evaluator_evaluate_if_necessary(save_best_so_far_agent, n_episode
         assert agent.save.call_count == 0
 
     # Third evaluation with a better score
-    env.step.return_value = ("obs", 1, True, {})
+    env.step.return_value = ("obs", 1, True, False, {})
     agent_evaluator.evaluate_if_necessary(t=9, episodes=9, env=env, agent=agent)
     assert agent.act.call_count == 3 * n_episodes
     assert agent.observe.call_count == 3 * n_episodes
@@ -179,12 +179,12 @@ def test_run_evaluation_episodes_with_n_steps(n_episodes, n_steps):
     # Second episode: 4 -> 5 -> 6 -> 7 (done)
     env.reset.side_effect = [("state", 0), ("state", 4)]
     env.step.side_effect = [
-        (("state", 1), 0.1, False, {}),
-        (("state", 2), 0.2, False, {}),
-        (("state", 3), 0.3, False, {"needs_reset": True}),
-        (("state", 5), -0.5, False, {}),
-        (("state", 6), 0, False, {}),
-        (("state", 7), 1, True, {}),
+        (("state", 1), 0.1, False, False, {}),
+        (("state", 2), 0.2, False, False, {}),
+        (("state", 3), 0.3, False, True, {"needs_reset": True}),
+        (("state", 5), -0.5, False, False, {}),
+        (("state", 6), 0, False, False, {}),
+        (("state", 7), 1, True, False, {}),
     ]
 
     if n_episodes:
@@ -226,12 +226,12 @@ def test_needs_reset(self):
         # Second episode: 4 -> 5 -> 6 -> 7 (done)
         env.reset.side_effect = [("state", 0), ("state", 4)]
         env.step.side_effect = [
-            (("state", 1), 0, False, {}),
-            (("state", 2), 0, False, {}),
-            (("state", 3), 0, False, {"needs_reset": True}),
-            (("state", 5), -0.5, False, {}),
-            (("state", 6), 0, False, {}),
-            (("state", 7), 1, True, {}),
+            (("state", 1), 0, False, False, {}),
+            (("state", 2), 0, False, False, {}),
+            (("state", 3), 0, False, True, {"needs_reset": True}),
+            (("state", 5), -0.5, False, False, {}),
+            (("state", 6), 0, False, False, {}),
+            (("state", 7), 1, True, False, {}),
         ]
         scores, lengths = evaluator.run_evaluation_episodes(
             env, agent, n_steps=None, n_episodes=2
@@ -261,11 +261,11 @@ def make_env(idx):
             # Second episode: 4 -> 5 -> 6 -> 7 (done)
             env.reset.side_effect = [("state", 0), ("state", 4)]
             env.step.side_effect = [
-                (("state", 1), 0, False, {}),
-                (("state", 2), 0.1, False, {}),
-                (("state", 3), 0.2, False, {"needs_reset": True}),
-                (("state", 5), -0.5, False, {}),
-                (("state", 6), 0, False, {}),
+                (("state", 1), 0, False, False, {}),
+                (("state", 2), 0.1, False, False, {}),
+                (("state", 3), 0.2, False, False, {"needs_reset": True}),
+                (("state", 5), -0.5, False, False, {}),
+                (("state", 6), 0, False, False, {}),
                 (("state", 7), 1, True, {}),
             ]
         else:
@@ -274,11 +274,11 @@ def make_env(idx):
             # Third episode: 4 -> 5 -> 6 -> 7 (done)
             env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)]
             env.step.side_effect = [
-                (("state", 1), 2, False, {"needs_reset": True}),
-                (("state", 3), 3, False, {"needs_reset": True}),
-                (("state", 5), -0.6, False, {}),
-                (("state", 6), 0, False, {}),
-                (("state", 7), 1, True, {}),
+                (("state", 1), 2, False, False, {"needs_reset": True}),
+                (("state", 3), 3, False, False, {"needs_reset": True}),
+                (("state", 5), -0.6, False, False, {}),
+                (("state", 6), 0, False, False, {}),
+                (("state", 7), 1, True, False, {}),
             ]
         return env
 
@@ -326,12 +326,12 @@ def make_env(idx):
                 # Second episode: 4 -> 5 -> 6 -> 7 (done)
                 env.reset.side_effect = [("state", 0), ("state", 4)]
                 env.step.side_effect = [
-                    (("state", 1), 0, False, {}),
-                    (("state", 2), 0, False, {}),
-                    (("state", 3), 0, False, {"needs_reset": True}),
-                    (("state", 5), -0.5, False, {}),
-                    (("state", 6), 0, False, {}),
-                    (("state", 7), 1, True, {}),
+                    (("state", 1), 0, False, False, {}),
+                    (("state", 2), 0, False, False, {}),
+                    (("state", 3), 0, False, False, {"needs_reset": True}),
+                    (("state", 5), -0.5, False, False, {}),
+                    (("state", 6), 0, False, False, {}),
+                    (("state", 7), 1, True, False, {}),
                 ]
             else:
                 # First episode: 0 -> 1 (reset)
@@ -339,11 +339,11 @@ def make_env(idx):
                 # Third episode: 4 -> 5 -> 6 -> 7 (done)
                 env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)]
                 env.step.side_effect = [
-                    (("state", 1), 2, False, {"needs_reset": True}),
-                    (("state", 3), 3, False, {"needs_reset": True}),
-                    (("state", 5), -0.6, False, {}),
-                    (("state", 6), 0, False, {}),
-                    (("state", 7), 1, True, {}),
+                    (("state", 1), 2, False, False, {"needs_reset": True}),
+                    (("state", 3), 3, False, False, {"needs_reset": True}),
+                    (("state", 5), -0.6, False, False, {}),
+                    (("state", 6), 0, False, False, {}),
+                    (("state", 7), 1, True, False, {}),
                 ]
             return env
 
diff --git a/tests/experiments_tests/test_train_agent.py b/tests/experiments_tests/test_train_agent.py
index a83315339..d60249f57 100644
--- a/tests/experiments_tests/test_train_agent.py
+++ b/tests/experiments_tests/test_train_agent.py
@@ -16,11 +16,11 @@ def test(self):
         # Reaches the terminal state after five actions
         env.reset.side_effect = [("state", 0)]
         env.step.side_effect = [
-            (("state", 1), 0, False, {}),
-            (("state", 2), 0, False, {}),
-            (("state", 3), -0.5, False, {}),
-            (("state", 4), 0, False, {}),
-            (("state", 5), 1, True, {}),
+            (("state", 1), 0, False, False, {}),
+            (("state", 2), 0, False, False, {}),
+            (("state", 3), -0.5, False, False, {}),
+            (("state", 4), 0, False, False, {}),
+            (("state", 5), 1, True, False, {}),
         ]
         hook = mock.Mock()
 
@@ -57,12 +57,12 @@ def test_needs_reset(self):
         # Second episode: 4 -> 5 -> 6 -> 7 (done)
         env.reset.side_effect = [("state", 0), ("state", 4)]
         env.step.side_effect = [
-            (("state", 1), 0, False, {}),
-            (("state", 2), 0, False, {}),
-            (("state", 3), 0, False, {"needs_reset": True}),
-            (("state", 5), -0.5, False, {}),
-            (("state", 6), 0, False, {}),
-            (("state", 7), 1, True, {}),
+            (("state", 1), 0, False, False, {}),
+            (("state", 2), 0, False, False, {}),
+            (("state", 3), 0, False, False, {"needs_reset": True}),
+            (("state", 5), -0.5, False, False, {}),
+            (("state", 6), 0, False, False,{}),
+            (("state", 7), 1, True, False, {}),
         ]
         hook = mock.Mock()
 
@@ -141,11 +141,11 @@ def test_eval_during_episode(eval_during_episode):
     # Two episodes
     env.reset.side_effect = [("state", 0)] * 2
     env.step.side_effect = [
-        (("state", 1), 0, False, {}),
-        (("state", 2), 0, False, {}),
-        (("state", 3), -0.5, True, {}),
-        (("state", 4), 0, False, {}),
-        (("state", 5), 1, True, {}),
+        (("state", 1), 0, False, False, {}),
+        (("state", 2), 0, False, False, {}),
+        (("state", 3), -0.5, True, False, {}),
+        (("state", 4), 0, False, False, {}),
+        (("state", 5), 1, True, False, {}),
     ]
 
     evaluator = mock.Mock()
diff --git a/tests/experiments_tests/test_train_agent_async.py b/tests/experiments_tests/test_train_agent_async.py
index 4d023269d..8437ffc01 100644
--- a/tests/experiments_tests/test_train_agent_async.py
+++ b/tests/experiments_tests/test_train_agent_async.py
@@ -26,16 +26,16 @@ def _make_env(process_idx, test):
         if max_episode_len is None:
             # Episodic env that terminates after 5 actions
             env.step.side_effect = [
-                (("state", 1), 0, False, {}),
-                (("state", 2), 0, False, {}),
-                (("state", 3), -0.5, False, {}),
-                (("state", 4), 0, False, {}),
-                (("state", 5), 1, True, {}),
+                (("state", 1), 0, False, False, {}),
+                (("state", 2), 0, False, False, {}),
+                (("state", 3), -0.5, False, False, {}),
+                (("state", 4), 0, False, False, {}),
+                (("state", 5), 1, True, False, {}),
             ] * 1000
         else:
             # Continuing env
             env.step.side_effect = [
-                (("state", 1), 0, False, {}),
+                (("state", 1), 0, False,False, {}),
             ] * 1000
         return env
 
@@ -154,12 +154,12 @@ def test_needs_reset(self):
         # Second episode: 4 -> 5 -> 6 -> 7 (done)
         env.reset.side_effect = [("state", 0), ("state", 4)]
         env.step.side_effect = [
-            (("state", 1), 0, False, {}),
-            (("state", 2), 0, False, {}),
-            (("state", 3), 0, False, {"needs_reset": True}),
-            (("state", 5), -0.5, False, {}),
-            (("state", 6), 0, False, {}),
-            (("state", 7), 1, True, {}),
+            (("state", 1), 0, False, False, {}),
+            (("state", 2), 0, False, False, {}),
+            (("state", 3), 0, False, False, {"needs_reset": True}),
+            (("state", 5), -0.5, False, False, {}),
+            (("state", 6), 0, False, False, {}),
+            (("state", 7), 1, True, False, {}),
         ]
 
         counter = mp.Value("i", 0)
diff --git a/tests/experiments_tests/test_train_agent_batch.py b/tests/experiments_tests/test_train_agent_batch.py
index 2c9d19c00..1e318c05b 100644
--- a/tests/experiments_tests/test_train_agent_batch.py
+++ b/tests/experiments_tests/test_train_agent_batch.py
@@ -24,16 +24,16 @@ def make_env():
         if max_episode_len is None:
             # Episodic env that terminates after 5 actions
             env.step.side_effect = [
-                (("state", 1), 0, False, {}),
-                (("state", 2), 0, False, {}),
-                (("state", 3), -0.5, False, {}),
-                (("state", 4), 0, False, {}),
-                (("state", 5), 1, True, {}),
+                (("state", 1), 0, False, False, {}),
+                (("state", 2), 0, False, False, {}),
+                (("state", 3), -0.5, False, False, {}),
+                (("state", 4), 0, False, False, {}),
+                (("state", 5), 1, True, False, {}),
             ] * 1000
         else:
             # Continuing env
             env.step.side_effect = [
-                (("state", 1), 0, False, {}),
+                (("state", 1), 0, False, False, {}),
             ] * 1000
         return env
 
@@ -193,12 +193,12 @@ def make_env(idx):
                 # Second episode: 4 -> 5 -> 6 -> 7 (done)
                 env.reset.side_effect = [("state", 0), ("state", 4)]
                 env.step.side_effect = [
-                    (("state", 1), 0, False, {}),
-                    (("state", 2), 0, False, {}),
-                    (("state", 3), 0, False, {"needs_reset": True}),
-                    (("state", 5), -0.5, False, {}),
-                    (("state", 6), 0, False, {}),
-                    (("state", 7), 1, True, {}),
+                    (("state", 1), 0, False, False, {}),
+                    (("state", 2), 0, False, False, {}),
+                    (("state", 3), 0, False, False, {"needs_reset": True}),
+                    (("state", 5), -0.5, False, False, {}),
+                    (("state", 6), 0, False, False, {}),
+                    (("state", 7), 1, True, False, {}),
                 ]
             else:
                 # First episode: 0 -> 1 (reset)
@@ -206,11 +206,11 @@ def make_env(idx):
                 # Third episode: 4 -> 5 -> 6 -> 7 (done)
                 env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)]
                 env.step.side_effect = [
-                    (("state", 1), 0, False, {"needs_reset": True}),
-                    (("state", 3), 0, False, {"needs_reset": True}),
-                    (("state", 5), -0.5, False, {}),
-                    (("state", 6), 0, False, {}),
-                    (("state", 7), 1, True, {}),
+                    (("state", 1), 0, False, False, {"needs_reset": True}),
+                    (("state", 3), 0, False, False, {"needs_reset": True}),
+                    (("state", 5), -0.5, False, False, {}),
+                    (("state", 6), 0, False, False, {}),
+                    (("state", 7), 1, True, False, {}),
                 ]
             return env
 
diff --git a/tests/wrappers_tests/test_atari_wrappers.py b/tests/wrappers_tests/test_atari_wrappers.py
index 04a21b573..f0be506e0 100644
--- a/tests/wrappers_tests/test_atari_wrappers.py
+++ b/tests/wrappers_tests/test_atari_wrappers.py
@@ -4,8 +4,8 @@
 
 from unittest import mock
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import pytest
 
@@ -45,8 +45,8 @@ def dtyped_rand():
             )
             for _ in range(steps)
         ]
-        env.action_space = gym.spaces.Discrete(2)
-        env.observation_space = gym.spaces.Box(
+        env.action_space = gymnasium.spaces.Discrete(2)
+        env.observation_space = gymnasium.spaces.Box(
             low=low, high=high, shape=(1, 84, 84), dtype=dtype
         )
         return env
@@ -73,8 +73,8 @@ def dtyped_rand():
     for _ in range(steps - 1):
         action = env.action_space.sample()
         fs_action = fs_env.action_space.sample()
-        obs, r, done, info = env.step(action)
-        fs_obs, fs_r, fs_done, fs_info = fs_env.step(fs_action)
+        obs, r, done, _, info = env.step(action)
+        fs_obs, fs_r, fs_done, _, fs_info = fs_env.step(fs_action)
         assert isinstance(fs_obs, LazyFrames)
         np.testing.assert_allclose(
             obs.take(indices=0, axis=fs_env.stack_axis),
@@ -116,8 +116,8 @@ def dtyped_rand():
             )
             for _ in range(steps)
         ]
-        env.action_space = gym.spaces.Discrete(2)
-        env.observation_space = gym.spaces.Box(
+        env.action_space = gymnasium.spaces.Discrete(2)
+        env.observation_space = gymnasium.spaces.Box(
             low=low, high=high, shape=(1, 84, 84), dtype=dtype
         )
         return env
@@ -140,8 +140,8 @@ def dtyped_rand():
     for _ in range(steps - 1):
         action = env.action_space.sample()
         s_action = s_env.action_space.sample()
-        obs, r, done, info = env.step(action)
-        s_obs, s_r, s_done, s_info = s_env.step(s_action)
+        obs, r, terminated, _, info = env.step(action)
+        s_obs, s_r, s_terminated, _, s_info = s_env.step(s_action)
         np.testing.assert_allclose(np.array(obs) / s_env.scale, s_obs)
         assert r == s_r
-        assert done == s_done
+        assert terminated == s_terminated
diff --git a/tests/wrappers_tests/test_cast_observation.py b/tests/wrappers_tests/test_cast_observation.py
index f6fac6269..5f925fb39 100644
--- a/tests/wrappers_tests/test_cast_observation.py
+++ b/tests/wrappers_tests/test_cast_observation.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium
 import numpy as np
 import pytest
 
@@ -8,7 +8,7 @@
 @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
 @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
 def test_cast_observation(env_id, dtype):
-    env = pfrl.wrappers.CastObservation(gym.make(env_id), dtype=dtype)
+    env = pfrl.wrappers.CastObservation(gymnasium.make(env_id), dtype=dtype)
     rtol = 1e-3 if dtype == np.float16 else 1e-7
 
     obs = env.reset()
@@ -16,7 +16,7 @@ def test_cast_observation(env_id, dtype):
     assert obs.dtype == dtype
     np.testing.assert_allclose(env.original_observation, obs, rtol=rtol)
 
-    obs, r, done, info = env.step(env.action_space.sample())
+    obs, r, done, _, info = env.step(env.action_space.sample())
 
     assert env.original_observation.dtype == np.float64
     assert obs.dtype == dtype
@@ -25,14 +25,14 @@ def test_cast_observation(env_id, dtype):
 
 @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
 def test_cast_observation_to_float32(env_id):
-    env = pfrl.wrappers.CastObservationToFloat32(gym.make(env_id))
+    env = pfrl.wrappers.CastObservationToFloat32(gymnasium.make(env_id))
 
     obs = env.reset()
     assert env.original_observation.dtype == np.float64
     assert obs.dtype == np.float32
     np.testing.assert_allclose(env.original_observation, obs)
 
-    obs, r, done, info = env.step(env.action_space.sample())
+    obs, r, done, _, info = env.step(env.action_space.sample())
     assert env.original_observation.dtype == np.float64
     assert obs.dtype == np.float32
     np.testing.assert_allclose(env.original_observation, obs)
diff --git a/tests/wrappers_tests/test_continuing_time_limit.py b/tests/wrappers_tests/test_continuing_time_limit.py
deleted file mode 100644
index 9a20d93c5..000000000
--- a/tests/wrappers_tests/test_continuing_time_limit.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from unittest import mock
-
-import pytest
-
-import pfrl
-
-
-@pytest.mark.parametrize("max_episode_steps", [1, 2, 3])
-def test_continuing_time_limit(max_episode_steps):
-    env = mock.Mock()
-    env.reset.side_effect = ["state"] * 2
-    # Since info dicts are modified by the wapper, each step call needs to
-    # return a new info dict.
-    env.step.side_effect = [("state", 0, False, {}) for _ in range(6)]
-    env = pfrl.wrappers.ContinuingTimeLimit(env, max_episode_steps=max_episode_steps)
-
-    env.reset()
-    for t in range(2):
-        _, _, done, info = env.step(0)
-        if t + 1 >= max_episode_steps:
-            assert info["needs_reset"]
-        else:
-            assert not info.get("needs_reset", False)
-
-    env.reset()
-    for t in range(4):
-        _, _, done, info = env.step(0)
-        if t + 1 >= max_episode_steps:
-            assert info["needs_reset"]
-        else:
-            assert not info.get("needs_reset", False)
diff --git a/tests/wrappers_tests/test_monitor.py b/tests/wrappers_tests/test_monitor.py
deleted file mode 100644
index ba65e9cc9..000000000
--- a/tests/wrappers_tests/test_monitor.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import os
-import shutil
-import tempfile
-
-import gym
-import pytest
-from gym.wrappers import TimeLimit
-
-import pfrl
-
-
-@pytest.mark.parametrize("n_episodes", [1, 2, 3, 4])
-def test_monitor(n_episodes):
-    steps = 15
-
-    env = gym.make("CartPole-v1")
-    # unwrap default TimeLimit and wrap with new one to simulate done=True
-    # at step 5
-    assert isinstance(env, TimeLimit)
-    env = env.env  # unwrap
-    env = TimeLimit(env, max_episode_steps=5)  # wrap
-
-    tmpdir = tempfile.mkdtemp()
-    try:
-        env = pfrl.wrappers.Monitor(
-            env, directory=tmpdir, video_callable=lambda episode_id: True
-        )
-        episode_idx = 0
-        episode_len = 0
-        t = 0
-        _ = env.reset()
-        while True:
-            _, _, done, info = env.step(env.action_space.sample())
-            episode_len += 1
-            t += 1
-            if episode_idx == 1 and episode_len >= 3:
-                info["needs_reset"] = True  # simulate ContinuingTimeLimit
-            if done or info.get("needs_reset", False) or t == steps:
-                if episode_idx + 1 == n_episodes or t == steps:
-                    break
-                env.reset()
-                episode_idx += 1
-                episode_len = 0
-        # `env.close()` is called when `env` is gabage-collected
-        # (or explicitly deleted/closed).
-        del env
-        # check if videos & meta files were generated
-        files = os.listdir(tmpdir)
-        mp4s = [f for f in files if f.endswith(".mp4")]
-        metas = [f for f in files if f.endswith(".meta.json")]
-        stats = [f for f in files if f.endswith(".stats.json")]
-        manifests = [f for f in files if f.endswith(".manifest.json")]
-        assert len(mp4s) == n_episodes
-        assert len(metas) == n_episodes
-        assert len(stats) == 1
-        assert len(manifests) == 1
-
-    finally:
-        shutil.rmtree(tmpdir)
diff --git a/tests/wrappers_tests/test_randomize_action.py b/tests/wrappers_tests/test_randomize_action.py
index b9f7a6c13..d0c204a70 100644
--- a/tests/wrappers_tests/test_randomize_action.py
+++ b/tests/wrappers_tests/test_randomize_action.py
@@ -1,20 +1,21 @@
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import pytest
 
 import pfrl
 
 
-class ActionRecordingEnv(gym.Env):
-    observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,))
-    action_space = gym.spaces.Discrete(3)
+class ActionRecordingEnv(gymnasium.Env):
+
+    observation_space = gymnasium.spaces.Box(low=-1, high=1, shape=(1,))
+    action_space = gymnasium.spaces.Discrete(3)
 
     def __init__(self):
         self.past_actions = []
 
     def reset(self):
-        return self.observation_space.sample()
+        return self.observation_space.sample(), {}
 
     def step(self, action):
         self.past_actions.append(action)
diff --git a/tests/wrappers_tests/test_render.py b/tests/wrappers_tests/test_render.py
index 64c347370..4e555b37a 100644
--- a/tests/wrappers_tests/test_render.py
+++ b/tests/wrappers_tests/test_render.py
@@ -21,39 +21,41 @@ def test_render(render_kwargs):
         ("state", 3),
     ]
     orig_env.step.side_effect = [
-        (("state", 1), 0, False, {}),
-        (("state", 2), 1, True, {}),
+        (("state", 1), 0, False, False, {}),
+        (("state", 2), 1, True, False, {}),
     ]
     env = pfrl.wrappers.Render(orig_env, **render_kwargs)
 
     # Not called env.render yet
     assert orig_env.render.call_count == 0
 
-    obs = env.reset()
+    obs, _ = env.reset()
     assert obs == ("state", 0)
 
     # Called once
     assert orig_env.render.call_count == 1
 
-    obs, reward, done, info = env.step(0)
+    obs, reward, terminated, truncated, info = env.step(0)
     assert obs == ("state", 1)
     assert reward == 0
-    assert not done
+    assert not terminated
+    assert not truncated
     assert info == {}
 
     # Called twice
     assert orig_env.render.call_count == 2
 
-    obs, reward, done, info = env.step(0)
+    obs, reward, terminated, truncated, info = env.step(0)
     assert obs == ("state", 2)
     assert reward == 1
-    assert done
+    assert terminated
+    assert not truncated
     assert info == {}
 
     # Called thrice
     assert orig_env.render.call_count == 3
 
-    obs = env.reset()
+    obs, _ = env.reset()
     assert obs == ("state", 3)
 
     # Called four times
diff --git a/tests/wrappers_tests/test_scale_reward.py b/tests/wrappers_tests/test_scale_reward.py
index 027287461..4bb95f720 100644
--- a/tests/wrappers_tests/test_scale_reward.py
+++ b/tests/wrappers_tests/test_scale_reward.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 import numpy as np
 import pytest
 
diff --git a/tests/wrappers_tests/test_vector_frame_stack.py b/tests/wrappers_tests/test_vector_frame_stack.py
index 1a236a1ff..e739c1ed8 100644
--- a/tests/wrappers_tests/test_vector_frame_stack.py
+++ b/tests/wrappers_tests/test_vector_frame_stack.py
@@ -2,8 +2,8 @@
 import unittest
 from unittest import mock
 
-import gym
-import gym.spaces
+import gymnasium
+import gymnasium.spaces
 import numpy as np
 import pytest
 
@@ -43,8 +43,8 @@ def make_env(idx):
             )
             for _ in range(steps)
         ]
-        env.action_space = gym.spaces.Discrete(2)
-        env.observation_space = gym.spaces.Box(
+        env.action_space = gymnasium.spaces.Discrete(2)
+        env.observation_space = gymnasium.spaces.Box(
             low=0, high=255, shape=(1, 84, 84), dtype=np.uint8
         )
         return env
@@ -72,8 +72,8 @@ def make_env(idx):
     assert fs_env.action_space == vfs_env.action_space
     assert fs_env.observation_space == vfs_env.observation_space
 
-    fs_obs = fs_env.reset()
-    vfs_obs = vfs_env.reset()
+    fs_obs, _ = fs_env.reset()
+    vfs_obs, _ = vfs_env.reset()
 
     # Same LazyFrames observations
     for env_idx in range(num_envs):
@@ -84,8 +84,8 @@ def make_env(idx):
         )
 
     batch_action = [0] * num_envs
-    fs_new_obs, fs_r, fs_done, _ = fs_env.step(batch_action)
-    vfs_new_obs, vfs_r, vfs_done, _ = vfs_env.step(batch_action)
+    fs_new_obs, fs_r, fs_done, _, _ = fs_env.step(batch_action)
+    vfs_new_obs, vfs_r, vfs_done, _,  _ = vfs_env.step(batch_action)
 
     # Same LazyFrames observations, but those from fs_env are copies
     # while those from vfs_env are references.
@@ -105,8 +105,8 @@ def make_env(idx):
     for _ in range(steps - 1):
         fs_env.reset(mask=np.logical_not(fs_done))
         vfs_env.reset(mask=np.logical_not(vfs_done))
-        fs_obs, fs_r, fs_done, _ = fs_env.step(batch_action)
-        vfs_obs, vfs_r, vfs_done, _ = vfs_env.step(batch_action)
+        fs_obs, fs_r, fs_terminated, _, _ = fs_env.step(batch_action)
+        vfs_obs, vfs_r, vfs_terminated, _, _ = vfs_env.step(batch_action)
         for env_idx in range(num_envs):
             assert isinstance(fs_new_obs[env_idx], LazyFrames)
             assert isinstance(vfs_new_obs[env_idx], LazyFrames)
@@ -114,4 +114,4 @@ def make_env(idx):
                 np.asarray(fs_new_obs[env_idx]), np.asarray(vfs_new_obs[env_idx])
             )
         np.testing.assert_allclose(fs_r, vfs_r)
-        np.testing.assert_allclose(fs_done, vfs_done)
+        np.testing.assert_allclose(fs_terminated, vfs_terminated)