From e5a102803c8af2c4f46d4d55148b9f4fee0f4cb7 Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Mon, 18 Nov 2024 15:56:05 +0100
Subject: [PATCH 1/4] Drop python 3.8, add python 3.12 support

---
 .github/workflows/ci.yml       |  5 ++--
 CHANGELOG.md                   | 16 ++++++++++++
 docs/conf.py                   |  3 +--
 pyproject.toml                 |  4 +--
 requirements.txt               |  2 +-
 rl_zoo3/benchmark.py           |  3 +--
 rl_zoo3/callbacks.py           |  4 +--
 rl_zoo3/exp_manager.py         | 48 +++++++++++++++++-----------------
 rl_zoo3/hyperparams_opt.py     | 26 +++++++++---------
 rl_zoo3/push_to_hub.py         | 16 ++++++------
 rl_zoo3/utils.py               | 28 ++++++++++----------
 rl_zoo3/version.txt            |  2 +-
 rl_zoo3/wrappers.py            | 14 +++++-----
 scripts/create_cluster_jobs.py |  3 +--
 scripts/run_jobs.py            |  3 +--
 setup.py                       |  6 ++---
 16 files changed, 97 insertions(+), 86 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 528e7fa2c..0c2cc3773 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
         include:
           # Default version
           - gymnasium-version: "1.0.0"
@@ -51,6 +51,7 @@ jobs:
       - name: Install specific version of gym
         run: |
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
+          uv pip install --system "numpy<2"
         # Only run for python 3.10, downgrade gym to 0.29.1
 
       - name: Lint with ruff
@@ -65,8 +66,6 @@ jobs:
       - name: Type check
         run: |
           make type
-        # Do not run for python 3.8 (mypy internal error)
-        if: matrix.python-version != '3.8'
       - name: Test with pytest
         run: |
           make pytest
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 773e72b6a..acdf4ed5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,19 @@
+## Release 2.5.0a0 (WIP)
+
+### Breaking Changes
+- Upgraded to Pytorch >= 2.3.0
+- Upgraded to SB3 >= 2.5.0
+
+### New Features
+- Added support for Numpy v2
+
+### Bug fixes
+
+### Documentation
+
+### Other
+
+
 ## Release 2.4.0 (2024-11-18)
 
 **New algorithm: CrossQ, Gymnasium v1.0 support, and better defaults for SAC/TQC on Swimmer-v4 env**
diff --git a/docs/conf.py b/docs/conf.py
index 113067604..2ea8f7557 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -14,7 +14,6 @@
 import datetime
 import os
 import sys
-from typing import Dict
 
 # We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
 # PyEnchant.
@@ -151,7 +150,7 @@ def setup(app):
 
 # -- Options for LaTeX output ------------------------------------------------
 
-latex_elements: Dict[str, str] = {
+latex_elements: dict[str, str] = {
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
diff --git a/pyproject.toml b/pyproject.toml
index b00654161..16072fc30 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,8 @@
 [tool.ruff]
 # Same as Black.
 line-length = 127
-# Assume Python 3.8
-target-version = "py38"
+# Assume Python 3.9
+target-version = "py39"
 
 [tool.ruff.lint]
 # See https://beta.ruff.rs/docs/rules/
diff --git a/requirements.txt b/requirements.txt
index acacb450e..97804065d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 gym==0.26.2
-stable-baselines3[extra,tests,docs]>=2.4.0,<3.0
+stable-baselines3[extra,tests,docs]>=2.5.0a0,<3.0
 box2d-py==2.3.8
 pybullet_envs_gymnasium>=0.5.0
 # minigrid
diff --git a/rl_zoo3/benchmark.py b/rl_zoo3/benchmark.py
index 8c7d8a210..22858482a 100644
--- a/rl_zoo3/benchmark.py
+++ b/rl_zoo3/benchmark.py
@@ -3,7 +3,6 @@
 import os
 import shutil
 import subprocess
-from typing import Dict, List
 
 import numpy as np
 import pandas as pd
@@ -33,7 +32,7 @@
     trained_models.update(get_hf_trained_models())
 
 n_experiments = len(trained_models)
-results: Dict[str, List] = {
+results: dict[str, list] = {
     "algo": [],
     "env_id": [],
     "mean_reward": [],
diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py
index 4bfdd2381..d260a91ca 100644
--- a/rl_zoo3/callbacks.py
+++ b/rl_zoo3/callbacks.py
@@ -4,7 +4,7 @@
 from copy import deepcopy
 from functools import wraps
 from threading import Thread
-from typing import Optional, Type, Union
+from typing import Optional, Union
 
 import optuna
 from sb3_contrib import TQC
@@ -119,7 +119,7 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa
         self._model: Union[SAC, TQC]
         self.gradient_steps = gradient_steps
         self.process: Thread
-        self.model_class: Union[Type[SAC], Type[TQC]]
+        self.model_class: Union[type[SAC], type[TQC]]
         self.sleep_time = sleep_time
 
     def _init_callback(self) -> None:
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
index b61786f72..321a0378a 100644
--- a/rl_zoo3/exp_manager.py
+++ b/rl_zoo3/exp_manager.py
@@ -7,7 +7,7 @@
 from collections import OrderedDict
 from pathlib import Path
 from pprint import pprint
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union
 
 import gymnasium as gym
 import numpy as np
@@ -71,9 +71,9 @@ def __init__(
         eval_freq: int = 10000,
         n_eval_episodes: int = 5,
         save_freq: int = -1,
-        hyperparams: Optional[Dict[str, Any]] = None,
-        env_kwargs: Optional[Dict[str, Any]] = None,
-        eval_env_kwargs: Optional[Dict[str, Any]] = None,
+        hyperparams: Optional[dict[str, Any]] = None,
+        env_kwargs: Optional[dict[str, Any]] = None,
+        eval_env_kwargs: Optional[dict[str, Any]] = None,
         trained_agent: str = "",
         optimize_hyperparameters: bool = False,
         storage: Optional[str] = None,
@@ -112,10 +112,10 @@ def __init__(
             default_path = Path(__file__).parent.parent
 
         self.config = config or str(default_path / f"hyperparams/{self.algo}.yml")
-        self.env_kwargs: Dict[str, Any] = env_kwargs or {}
+        self.env_kwargs: dict[str, Any] = env_kwargs or {}
         self.n_timesteps = n_timesteps
         self.normalize = False
-        self.normalize_kwargs: Dict[str, Any] = {}
+        self.normalize_kwargs: dict[str, Any] = {}
         self.env_wrapper: Optional[Callable] = None
         self.frame_stack = None
         self.seed = seed
@@ -124,14 +124,14 @@ def __init__(
         self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[vec_env_type]
         self.vec_env_wrapper: Optional[Callable] = None
 
-        self.vec_env_kwargs: Dict[str, Any] = {}
+        self.vec_env_kwargs: dict[str, Any] = {}
         # self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}
 
         # Callbacks
-        self.specified_callbacks: List = []
-        self.callbacks: List[BaseCallback] = []
+        self.specified_callbacks: list = []
+        self.callbacks: list[BaseCallback] = []
         # Use env-kwargs if eval_env_kwargs was not specified
-        self.eval_env_kwargs: Dict[str, Any] = eval_env_kwargs or self.env_kwargs
+        self.eval_env_kwargs: dict[str, Any] = eval_env_kwargs or self.env_kwargs
         self.save_freq = save_freq
         self.eval_freq = eval_freq
         self.n_eval_episodes = n_eval_episodes
@@ -139,8 +139,8 @@ def __init__(
 
         self.n_envs = 1  # it will be updated when reading hyperparams
         self.n_actions = 0  # For DDPG/TD3 action noise objects
-        self._hyperparams: Dict[str, Any] = {}
-        self.monitor_kwargs: Dict[str, Any] = {}
+        self._hyperparams: dict[str, Any] = {}
+        self.monitor_kwargs: dict[str, Any] = {}
 
         self.trained_agent = trained_agent
         self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent)
@@ -179,7 +179,7 @@ def __init__(
         )
         self.params_path = f"{self.save_path}/{self.env_name}"
 
-    def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
+    def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]:
         """
         Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
         create the environment and possibly the model.
@@ -223,7 +223,7 @@ def learn(self, model: BaseAlgorithm) -> None:
         """
         :param model: an initialized RL model
         """
-        kwargs: Dict[str, Any] = {}
+        kwargs: dict[str, Any] = {}
         if self.log_interval > -1:
             kwargs = {"log_interval": self.log_interval}
 
@@ -272,7 +272,7 @@ def save_trained_model(self, model: BaseAlgorithm) -> None:
             assert vec_normalize is not None
             vec_normalize.save(os.path.join(self.params_path, "vecnormalize.pkl"))
 
-    def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
+    def _save_config(self, saved_hyperparams: dict[str, Any]) -> None:
         """
         Save unprocessed hyperparameters, this can be use later
         to reproduce an experiment.
@@ -290,7 +290,7 @@ def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
 
         print(f"Log path: {self.save_path}")
 
-    def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
         print(f"Loading hyperparameters from: {self.config}")
 
         if self.config.endswith(".yml") or self.config.endswith(".yaml"):
@@ -298,7 +298,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
             with open(self.config) as f:
                 hyperparams_dict = yaml.safe_load(f)
         elif self.config.endswith(".py"):
-            global_variables: Dict = {}
+            global_variables: dict = {}
             # Load hyperparameters from python file
             exec(Path(self.config).read_text(), global_variables)
             hyperparams_dict = global_variables["hyperparams"]
@@ -327,7 +327,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         return hyperparams, saved_hyperparams
 
     @staticmethod
-    def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
+    def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
         # Create schedules
         for key in ["learning_rate", "clip_range", "clip_range_vf", "delta_std"]:
             if key not in hyperparams:
@@ -345,7 +345,7 @@ def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
                 raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
         return hyperparams
 
-    def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
+    def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, Any]:
         if "normalize" in hyperparams.keys():
             self.normalize = hyperparams["normalize"]
 
@@ -370,8 +370,8 @@ def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, An
         return hyperparams
 
     def _preprocess_hyperparams(  # noqa: C901
-        self, hyperparams: Dict[str, Any]
-    ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback], Optional[Callable]]:
+        self, hyperparams: dict[str, Any]
+    ) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]:
         self.n_envs = hyperparams.get("n_envs", 1)
 
         if self.verbose > 0:
@@ -448,8 +448,8 @@ def _preprocess_hyperparams(  # noqa: C901
         return hyperparams, env_wrapper, callbacks, vec_env_wrapper
 
     def _preprocess_action_noise(
-        self, hyperparams: Dict[str, Any], saved_hyperparams: Dict[str, Any], env: VecEnv
-    ) -> Dict[str, Any]:
+        self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv
+    ) -> dict[str, Any]:
         # Parse noise string
         # Note: only off-policy algorithms are supported
         if hyperparams.get("noise_type") is not None:
@@ -667,7 +667,7 @@ def make_env(**kwargs) -> gym.Env:
 
         return env
 
-    def _load_pretrained_agent(self, hyperparams: Dict[str, Any], env: VecEnv) -> BaseAlgorithm:
+    def _load_pretrained_agent(self, hyperparams: dict[str, Any], env: VecEnv) -> BaseAlgorithm:
         # Continue training
         print("Loading pretrained agent")
         # Policy should not be changed
diff --git a/rl_zoo3/hyperparams_opt.py b/rl_zoo3/hyperparams_opt.py
index 1ff6708a0..4b77db4a0 100644
--- a/rl_zoo3/hyperparams_opt.py
+++ b/rl_zoo3/hyperparams_opt.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict
+from typing import Any
 
 import numpy as np
 import optuna
@@ -8,7 +8,7 @@
 from rl_zoo3 import linear_schedule
 
 
-def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for PPO hyperparams.
 
@@ -76,7 +76,7 @@ def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     }
 
 
-def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for RecurrentPPO hyperparams.
     uses sample_ppo_params(), this function samples for the policy_kwargs
@@ -98,7 +98,7 @@ def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, add
     return hyperparams
 
 
-def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for TRPO hyperparams.
 
@@ -165,7 +165,7 @@ def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio
     }
 
 
-def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for A2C hyperparams.
 
@@ -229,7 +229,7 @@ def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     }
 
 
-def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for SAC hyperparams.
 
@@ -290,7 +290,7 @@ def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     return hyperparams
 
 
-def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for TD3 hyperparams.
 
@@ -346,7 +346,7 @@ def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     return hyperparams
 
 
-def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for DDPG hyperparams.
 
@@ -400,7 +400,7 @@ def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio
     return hyperparams
 
 
-def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for DQN hyperparams.
 
@@ -444,7 +444,7 @@ def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     return hyperparams
 
 
-def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwargs: Dict[str, Any]) -> Dict[str, Any]:
+def sample_her_params(trial: optuna.Trial, hyperparams: dict[str, Any], her_kwargs: dict[str, Any]) -> dict[str, Any]:
     """
     Sampler for HerReplayBuffer hyperparams.
 
@@ -461,7 +461,7 @@ def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwar
     return hyperparams
 
 
-def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for TQC hyperparams.
 
@@ -480,7 +480,7 @@ def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
     return hyperparams
 
 
-def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for QR-DQN hyperparams.
 
@@ -496,7 +496,7 @@ def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additi
     return hyperparams
 
 
-def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
+def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
     """
     Sampler for ARS hyperparams.
     :param trial:
diff --git a/rl_zoo3/push_to_hub.py b/rl_zoo3/push_to_hub.py
index ede7d9f77..499bcc366 100644
--- a/rl_zoo3/push_to_hub.py
+++ b/rl_zoo3/push_to_hub.py
@@ -6,7 +6,7 @@
 from copy import deepcopy
 from pathlib import Path
 from pprint import pformat
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Optional
 
 import torch as th
 import yaml
@@ -27,7 +27,7 @@
 msg = Printer()
 
 
-def save_model_card(repo_dir: Path, generated_model_card: str, metadata: Dict[str, Any]) -> None:
+def save_model_card(repo_dir: Path, generated_model_card: str, metadata: dict[str, Any]) -> None:
     """Saves a model card for the repository.
 
     :param repo_dir: repository directory
@@ -50,9 +50,9 @@ def generate_model_card(
     env_id: str,
     mean_reward: float,
     std_reward: float,
-    hyperparams: Dict[str, Any],
-    env_kwargs: Dict[str, Any],
-) -> Tuple[str, Dict[str, Any]]:
+    hyperparams: dict[str, Any],
+    env_kwargs: dict[str, Any],
+) -> tuple[str, dict[str, Any]]:
     """
     Generate the model card for the Hub
 
@@ -131,8 +131,8 @@ def package_to_hub(
     algo_name: str,
     algo_class_name: str,
     log_path: Path,
-    hyperparams: Dict[str, Any],
-    env_kwargs: Dict[str, Any],
+    hyperparams: dict[str, Any],
+    env_kwargs: dict[str, Any],
     env_name: EnvironmentName,
     eval_env: VecEnv,
     repo_id: ModelRepoId,
@@ -394,7 +394,7 @@ def package_to_hub(
 
     # Note: we assume that we push models using the same machine (same python version)
     # that trained them, if not, we would need to pass custom object as in enjoy.py
-    custom_objects: Dict[str, Any] = {}
+    custom_objects: dict[str, Any] = {}
     model = ALGOS[algo].load(model_path, env=eval_env, custom_objects=custom_objects, device=args.device, **kwargs)
 
     # Deterministic by default except for atari games
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
index 4b270e280..30d557945 100644
--- a/rl_zoo3/utils.py
+++ b/rl_zoo3/utils.py
@@ -3,7 +3,7 @@
 import importlib
 import os
 from copy import deepcopy
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Optional, Union
 
 import gymnasium as gym
 import stable_baselines3 as sb3  # noqa: F401
@@ -23,7 +23,7 @@
 # For custom activation fn
 from torch import nn as nn
 
-ALGOS: Dict[str, Type[BaseAlgorithm]] = {
+ALGOS: dict[str, type[BaseAlgorithm]] = {
     "a2c": A2C,
     "ddpg": DDPG,
     "dqn": DQN,
@@ -45,7 +45,7 @@ def flatten_dict_observations(env: gym.Env) -> gym.Env:
     return gym.wrappers.FlattenObservation(env)
 
 
-def get_wrapper_class(hyperparams: Dict[str, Any], key: str = "env_wrapper") -> Optional[Callable[[gym.Env], gym.Env]]:
+def get_wrapper_class(hyperparams: dict[str, Any], key: str = "env_wrapper") -> Optional[Callable[[gym.Env], gym.Env]]:
     """
     Get one or more Gym environment wrapper class specified as a hyper parameter
     "env_wrapper".
@@ -118,7 +118,7 @@ def wrap_env(env: gym.Env) -> gym.Env:
         return None
 
 
-def get_class_by_name(name: str) -> Type:
+def get_class_by_name(name: str) -> type:
     """
     Imports and returns a class given the name, e.g. passing
     'stable_baselines3.common.callbacks.CheckpointCallback' returns the
@@ -138,7 +138,7 @@ def get_class_name(name: str) -> str:
     return getattr(module, get_class_name(name))
 
 
-def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]:
+def get_callback_list(hyperparams: dict[str, Any]) -> list[BaseCallback]:
     """
     Get one or more Callback class specified as a hyper-parameter
     "callback".
@@ -155,7 +155,7 @@ def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]:
     :return:
     """
 
-    callbacks: List[BaseCallback] = []
+    callbacks: list[BaseCallback] = []
 
     if "callback" in hyperparams.keys():
         callback_name = hyperparams.get("callback")
@@ -196,8 +196,8 @@ def create_test_env(
     seed: int = 0,
     log_dir: Optional[str] = None,
     should_render: bool = True,
-    hyperparams: Optional[Dict[str, Any]] = None,
-    env_kwargs: Optional[Dict[str, Any]] = None,
+    hyperparams: Optional[dict[str, Any]] = None,
+    env_kwargs: Optional[dict[str, Any]] = None,
 ) -> VecEnv:
     """
     Create environment for testing a trained agent
@@ -221,7 +221,7 @@ def create_test_env(
     if "env_wrapper" in hyperparams.keys():
         del hyperparams["env_wrapper"]
 
-    vec_env_kwargs: Dict[str, Any] = {}
+    vec_env_kwargs: dict[str, Any] = {}
     # Avoid potential shared memory issue
     vec_env_cls = SubprocVecEnv if n_envs > 1 else DummyVecEnv
 
@@ -299,7 +299,7 @@ def func(progress_remaining: float) -> float:
     return func
 
 
-def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
+def get_trained_models(log_folder: str) -> dict[str, tuple[str, str]]:
     """
     :param log_folder: Root log folder
     :return: Dict representing the trained agents
@@ -320,7 +320,7 @@ def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
     return trained_models
 
 
-def get_hf_trained_models(organization: str = "sb3", check_filename: bool = False) -> Dict[str, Tuple[str, str]]:
+def get_hf_trained_models(organization: str = "sb3", check_filename: bool = False) -> dict[str, tuple[str, str]]:
     """
     Get pretrained models,
     available on the Hugginface hub for a given organization.
@@ -382,7 +382,7 @@ def get_saved_hyperparams(
     stats_path: str,
     norm_reward: bool = False,
     test_mode: bool = False,
-) -> Tuple[Dict[str, Any], Optional[str]]:
+) -> tuple[dict[str, Any], Optional[str]]:
     """
     Retrieve saved hyperparameters given a path.
     Return empty dict and None if the path is not valid.
@@ -392,7 +392,7 @@ def get_saved_hyperparams(
     :param test_mode:
     :return:
     """
-    hyperparams: Dict[str, Any] = {}
+    hyperparams: dict[str, Any] = {}
     if not os.path.isdir(stats_path):
         return hyperparams, None
     else:
@@ -448,7 +448,7 @@ def get_model_path(
     load_best: bool = False,
     load_checkpoint: Optional[str] = None,
     load_last_checkpoint: bool = False,
-) -> Tuple[str, str, str]:
+) -> tuple[str, str, str]:
     if exp_id == 0:
         exp_id = get_latest_run_id(os.path.join(folder, algo), env_name)
         print(f"Loading latest experiment, id={exp_id}")
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
index 197c4d5c2..b8feefb94 100644
--- a/rl_zoo3/version.txt
+++ b/rl_zoo3/version.txt
@@ -1 +1 @@
-2.4.0
+2.5.0a0
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
index c9b7810ed..e820fc6b0 100644
--- a/rl_zoo3/wrappers.py
+++ b/rl_zoo3/wrappers.py
@@ -1,4 +1,4 @@
-from typing import Any, ClassVar, Dict, Optional, SupportsFloat, Tuple
+from typing import Any, ClassVar, Optional, SupportsFloat
 
 import gymnasium as gym
 import numpy as np
@@ -54,7 +54,7 @@ def __init__(self, env: gym.Env, noise_std: float = 0.1):
         super().__init__(env)
         self.noise_std = noise_std
 
-    def step(self, action: np.ndarray) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
+    def step(self, action: np.ndarray) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
         assert isinstance(self.action_space, spaces.Box)
         noise = np.random.normal(np.zeros_like(action), np.ones_like(action) * self.noise_std)
         noisy_action = np.clip(action + noise, self.action_space.low, self.action_space.high)
@@ -165,7 +165,7 @@ def __init__(self, env: gym.Env, horizon: int = 2):
     def _create_obs_from_history(self) -> np.ndarray:
         return np.concatenate((self.obs_history, self.action_history))
 
-    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[np.ndarray, Dict]:
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]:
         # Flush the history
         self.obs_history[...] = 0
         self.action_history[...] = 0
@@ -174,7 +174,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> T
         self.obs_history[..., -obs.shape[-1] :] = obs
         return self._create_obs_from_history(), info
 
-    def step(self, action) -> Tuple[np.ndarray, SupportsFloat, bool, bool, Dict]:
+    def step(self, action) -> tuple[np.ndarray, SupportsFloat, bool, bool, dict]:
         obs, reward, terminated, truncated, info = self.env.step(action)
         last_ax_size = obs.shape[-1]
 
@@ -230,7 +230,7 @@ def __init__(self, env: gym.Env, horizon: int = 2):
     def _create_obs_from_history(self) -> np.ndarray:
         return np.concatenate((self.obs_history, self.action_history))
 
-    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[Dict[str, np.ndarray], Dict]:
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[dict[str, np.ndarray], dict]:
         # Flush the history
         self.obs_history[...] = 0
         self.action_history[...] = 0
@@ -243,7 +243,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> T
 
         return obs_dict, info
 
-    def step(self, action) -> Tuple[Dict[str, np.ndarray], SupportsFloat, bool, bool, Dict]:
+    def step(self, action) -> tuple[dict[str, np.ndarray], SupportsFloat, bool, bool, dict]:
         obs_dict, reward, terminated, truncated, info = self.env.step(action)
         obs = obs_dict["observation"]
         last_ax_size = obs.shape[-1]
@@ -299,7 +299,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper):
     """
 
     # Supported envs
-    velocity_indices: ClassVar[Dict[str, np.ndarray]] = {
+    velocity_indices: ClassVar[dict[str, np.ndarray]] = {
         "CartPole-v1": np.array([1, 3]),
         "MountainCar-v0": np.array([1]),
         "MountainCarContinuous-v0": np.array([1]),
diff --git a/scripts/create_cluster_jobs.py b/scripts/create_cluster_jobs.py
index ed84627c9..b7b523fe1 100644
--- a/scripts/create_cluster_jobs.py
+++ b/scripts/create_cluster_jobs.py
@@ -5,7 +5,6 @@
 import os
 import subprocess
 import time
-from typing import List
 
 import numpy as np
 
@@ -35,7 +34,7 @@
                     log_folder,
                     "-uuid",
                 ]
-                arg_str_list: List[str] = list(map(str, args))
+                arg_str_list: list[str] = list(map(str, args))
 
                 command = " ".join(["python", "-u", "train.py", *arg_str_list])
 
diff --git a/scripts/run_jobs.py b/scripts/run_jobs.py
index 5d5a87794..ff90765e5 100644
--- a/scripts/run_jobs.py
+++ b/scripts/run_jobs.py
@@ -3,7 +3,6 @@
 """
 
 import subprocess
-from typing import List
 
 import numpy as np
 
@@ -33,6 +32,6 @@
                     "-f",
                     log_folder,
                 ]
-                arg_str_list: List[str] = list(map(str, args))
+                arg_str_list: list[str] = list(map(str, args))
 
                 ok = subprocess.call(["python", "train.py", *arg_str_list])
diff --git a/setup.py b/setup.py
index fd98f84f0..6426672a8 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
 See https://github.com/DLR-RM/rl-baselines3-zoo
 """
 install_requires = [
-    "sb3_contrib>=2.4.0,<3.0",
+    "sb3_contrib>=2.5.0a0,<3.0",
     "gymnasium>=0.29.1,<1.1.0",
     "huggingface_sb3>=3.0,<4.0",
     "tqdm",
@@ -56,7 +56,7 @@
     long_description=long_description,
     long_description_content_type="text/markdown",
     version=__version__,
-    python_requires=">=3.8",
+    python_requires=">=3.9",
     # PyPI package information.
     project_urls={
         "Code": "https://github.com/DLR-RM/rl-baselines3-zoo",
@@ -68,10 +68,10 @@
     },
     classifiers=[
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
     ],
 )
 

From 18fec193141963d0ee9b4ad9464edae8258108fa Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Mon, 18 Nov 2024 16:00:27 +0100
Subject: [PATCH 2/4] Update trained agent CI too

---
 .github/workflows/trained_agents.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index 8199ca671..de21319bd 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
         include:
           # Default version
           - gymnasium-version: "1.0.0"
@@ -45,7 +45,6 @@ jobs:
           # See https://github.com/astral-sh/uv/issues/1497
           uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
           # Install full requirements (for additional envs and test tools)
-          # Install full requirements (for additional envs and test tools)
           uv pip install --system -r requirements.txt
           # Use headless version
           uv pip install --system opencv-python-headless
@@ -54,6 +53,7 @@ jobs:
       - name: Install specific version of gym
         run: |
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
+          uv pip install --system "numpy<2"
         # Only run for python 3.10, downgrade gym to 0.29.1
 
       - name: Check trained agents

From de1daa762646f1d9d8314898d1486510076e0a51 Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Mon, 18 Nov 2024 16:17:46 +0100
Subject: [PATCH 3/4] Add missing condition in CI

---
 .github/workflows/ci.yml             | 1 +
 .github/workflows/trained_agents.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c2cc3773..9050c29ab 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,6 +53,7 @@ jobs:
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
           uv pip install --system "numpy<2"
         # Only run for python 3.10, downgrade gym to 0.29.1
+        if: matrix.gymnasium-version != '1.0.0'
 
       - name: Lint with ruff
         run: |
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index de21319bd..a6b5ad8a8 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -55,6 +55,7 @@ jobs:
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
           uv pip install --system "numpy<2"
         # Only run for python 3.10, downgrade gym to 0.29.1
+        if: matrix.gymnasium-version != '1.0.0'
 
       - name: Check trained agents
         run: |

From e92ab553d72716696b1f0d8ad8336b43d537b021 Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Tue, 19 Nov 2024 11:12:26 +0100
Subject: [PATCH 4/4] Downgrade numpy for pybullet and add a notice

---
 .github/workflows/trained_agents.yml | 3 +++
 README.md                            | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index a6b5ad8a8..1993e55dc 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -49,6 +49,9 @@ jobs:
           # Use headless version
           uv pip install --system opencv-python-headless
           uv pip install --system -e .[plots,tests]
+          # Downgrade numpy to run pybullet agents
+          # See https://github.com/bulletphysics/bullet3/issues/4649
+          uv pip install --system "numpy<2"
 
       - name: Install specific version of gym
         run: |
diff --git a/README.md b/README.md
index 0b205060f..7c4c9ad74 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,8 @@ Goals of this repository:
 
 This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo).
 
+Note: although SB3 and the RL Zoo are compatible with Numpy>=2.0, you will need Numpy<2 to run agents on pybullet envs (see [issue](https://github.com/bulletphysics/bullet3/issues/4649)).
+
 ## Documentation
 
 Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)