From e5a102803c8af2c4f46d4d55148b9f4fee0f4cb7 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 18 Nov 2024 15:56:05 +0100 Subject: [PATCH 1/4] Drop python 3.8, add python 3.12 support --- .github/workflows/ci.yml | 5 ++-- CHANGELOG.md | 16 ++++++++++++ docs/conf.py | 3 +-- pyproject.toml | 4 +-- requirements.txt | 2 +- rl_zoo3/benchmark.py | 3 +-- rl_zoo3/callbacks.py | 4 +-- rl_zoo3/exp_manager.py | 48 +++++++++++++++++----------------- rl_zoo3/hyperparams_opt.py | 26 +++++++++--------- rl_zoo3/push_to_hub.py | 16 ++++++------ rl_zoo3/utils.py | 28 ++++++++++---------- rl_zoo3/version.txt | 2 +- rl_zoo3/wrappers.py | 14 +++++----- scripts/create_cluster_jobs.py | 3 +-- scripts/run_jobs.py | 3 +-- setup.py | 6 ++--- 16 files changed, 97 insertions(+), 86 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 528e7fa2c..0c2cc3773 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] include: # Default version - gymnasium-version: "1.0.0" @@ -51,6 +51,7 @@ jobs: - name: Install specific version of gym run: | uv pip install --system gymnasium==${{ matrix.gymnasium-version }} + uv pip install --system "numpy<2" # Only run for python 3.10, downgrade gym to 0.29.1 - name: Lint with ruff @@ -65,8 +66,6 @@ jobs: - name: Type check run: | make type - # Do not run for python 3.8 (mypy internal error) - if: matrix.python-version != '3.8' - name: Test with pytest run: | make pytest diff --git a/CHANGELOG.md b/CHANGELOG.md index 773e72b6a..acdf4ed5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +## Release 2.5.0a0 (WIP) + +### Breaking Changes +- Upgraded to Pytorch >= 2.3.0 +- Upgraded to SB3 >= 2.5.0 + +### New Features +- Added support for Numpy v2 + +### Bug fixes + +### Documentation + +### Other + + ## Release 2.4.0 (2024-11-18) **New algorithm: CrossQ, Gymnasium v1.0 support, and better defaults for SAC/TQC on Swimmer-v4 env** diff --git a/docs/conf.py b/docs/conf.py index 113067604..2ea8f7557 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,7 +14,6 @@ import datetime import os import sys -from typing import Dict # We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support # PyEnchant. @@ -151,7 +150,7 @@ def setup(app): # -- Options for LaTeX output ------------------------------------------------ -latex_elements: Dict[str, str] = { +latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', diff --git a/pyproject.toml b/pyproject.toml index b00654161..16072fc30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.ruff] # Same as Black. line-length = 127 -# Assume Python 3.8 -target-version = "py38" +# Assume Python 3.9 +target-version = "py39" [tool.ruff.lint] # See https://beta.ruff.rs/docs/rules/ diff --git a/requirements.txt b/requirements.txt index acacb450e..97804065d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ gym==0.26.2 -stable-baselines3[extra,tests,docs]>=2.4.0,<3.0 +stable-baselines3[extra,tests,docs]>=2.5.0a0,<3.0 box2d-py==2.3.8 pybullet_envs_gymnasium>=0.5.0 # minigrid diff --git a/rl_zoo3/benchmark.py b/rl_zoo3/benchmark.py index 8c7d8a210..22858482a 100644 --- a/rl_zoo3/benchmark.py +++ b/rl_zoo3/benchmark.py @@ -3,7 +3,6 @@ import os import shutil import subprocess -from typing import Dict, List import numpy as np import pandas as pd @@ -33,7 +32,7 @@ trained_models.update(get_hf_trained_models()) n_experiments = len(trained_models) -results: Dict[str, List] = { +results: dict[str, list] = { "algo": [], "env_id": [], "mean_reward": [], diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py index 4bfdd2381..d260a91ca 100644 --- a/rl_zoo3/callbacks.py +++ b/rl_zoo3/callbacks.py @@ -4,7 +4,7 @@ from copy import deepcopy from functools import wraps from threading import Thread -from typing import Optional, Type, Union +from typing import Optional, Union import optuna from sb3_contrib import TQC @@ -119,7 +119,7 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa self._model: Union[SAC, TQC] self.gradient_steps = gradient_steps self.process: Thread - self.model_class: Union[Type[SAC], Type[TQC]] + self.model_class: Union[type[SAC], type[TQC]] self.sleep_time = sleep_time def _init_callback(self) -> None: diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py index b61786f72..321a0378a 100644 --- a/rl_zoo3/exp_manager.py +++ b/rl_zoo3/exp_manager.py @@ -7,7 +7,7 @@ from collections import OrderedDict from pathlib import Path from pprint import pprint -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Optional, Union import gymnasium as gym import numpy as np @@ -71,9 +71,9 @@ def __init__( eval_freq: int = 10000, n_eval_episodes: int = 5, save_freq: int = -1, - hyperparams: Optional[Dict[str, Any]] = None, - env_kwargs: Optional[Dict[str, Any]] = None, - eval_env_kwargs: Optional[Dict[str, Any]] = None, + hyperparams: Optional[dict[str, Any]] = None, + env_kwargs: Optional[dict[str, Any]] = None, + eval_env_kwargs: Optional[dict[str, Any]] = None, trained_agent: str = "", optimize_hyperparameters: bool = False, storage: Optional[str] = None, @@ -112,10 +112,10 @@ def __init__( default_path = Path(__file__).parent.parent self.config = config or str(default_path / f"hyperparams/{self.algo}.yml") - self.env_kwargs: Dict[str, Any] = env_kwargs or {} + self.env_kwargs: dict[str, Any] = env_kwargs or {} self.n_timesteps = n_timesteps self.normalize = False - self.normalize_kwargs: Dict[str, Any] = {} + self.normalize_kwargs: dict[str, Any] = {} self.env_wrapper: Optional[Callable] = None self.frame_stack = None self.seed = seed @@ -124,14 +124,14 @@ def __init__( self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[vec_env_type] self.vec_env_wrapper: Optional[Callable] = None - self.vec_env_kwargs: Dict[str, Any] = {} + self.vec_env_kwargs: dict[str, Any] = {} # self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"} # Callbacks - self.specified_callbacks: List = [] - self.callbacks: List[BaseCallback] = [] + self.specified_callbacks: list = [] + self.callbacks: list[BaseCallback] = [] # Use env-kwargs if eval_env_kwargs was not specified - self.eval_env_kwargs: Dict[str, Any] = eval_env_kwargs or self.env_kwargs + self.eval_env_kwargs: dict[str, Any] = eval_env_kwargs or self.env_kwargs self.save_freq = save_freq self.eval_freq = eval_freq self.n_eval_episodes = n_eval_episodes @@ -139,8 +139,8 @@ def __init__( self.n_envs = 1 # it will be updated when reading hyperparams self.n_actions = 0 # For DDPG/TD3 action noise objects - self._hyperparams: Dict[str, Any] = {} - self.monitor_kwargs: Dict[str, Any] = {} + self._hyperparams: dict[str, Any] = {} + self.monitor_kwargs: dict[str, Any] = {} self.trained_agent = trained_agent self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent) @@ -179,7 +179,7 @@ def __init__( ) self.params_path = f"{self.save_path}/{self.env_name}" - def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]: + def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]: """ Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects) create the environment and possibly the model. @@ -223,7 +223,7 @@ def learn(self, model: BaseAlgorithm) -> None: """ :param model: an initialized RL model """ - kwargs: Dict[str, Any] = {} + kwargs: dict[str, Any] = {} if self.log_interval > -1: kwargs = {"log_interval": self.log_interval} @@ -272,7 +272,7 @@ def save_trained_model(self, model: BaseAlgorithm) -> None: assert vec_normalize is not None vec_normalize.save(os.path.join(self.params_path, "vecnormalize.pkl")) - def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None: + def _save_config(self, saved_hyperparams: dict[str, Any]) -> None: """ Save unprocessed hyperparameters, this can be use later to reproduce an experiment. @@ -290,7 +290,7 @@ def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None: print(f"Log path: {self.save_path}") - def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: + def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]: print(f"Loading hyperparameters from: {self.config}") if self.config.endswith(".yml") or self.config.endswith(".yaml"): @@ -298,7 +298,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: with open(self.config) as f: hyperparams_dict = yaml.safe_load(f) elif self.config.endswith(".py"): - global_variables: Dict = {} + global_variables: dict = {} # Load hyperparameters from python file exec(Path(self.config).read_text(), global_variables) hyperparams_dict = global_variables["hyperparams"] @@ -327,7 +327,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: return hyperparams, saved_hyperparams @staticmethod - def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]: + def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]: # Create schedules for key in ["learning_rate", "clip_range", "clip_range_vf", "delta_std"]: if key not in hyperparams: @@ -345,7 +345,7 @@ def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]: raise ValueError(f"Invalid value for {key}: {hyperparams[key]}") return hyperparams - def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]: + def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, Any]: if "normalize" in hyperparams.keys(): self.normalize = hyperparams["normalize"] @@ -370,8 +370,8 @@ def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, An return hyperparams def _preprocess_hyperparams( # noqa: C901 - self, hyperparams: Dict[str, Any] - ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback], Optional[Callable]]: + self, hyperparams: dict[str, Any] + ) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]: self.n_envs = hyperparams.get("n_envs", 1) if self.verbose > 0: @@ -448,8 +448,8 @@ def _preprocess_hyperparams( # noqa: C901 return hyperparams, env_wrapper, callbacks, vec_env_wrapper def _preprocess_action_noise( - self, hyperparams: Dict[str, Any], saved_hyperparams: Dict[str, Any], env: VecEnv - ) -> Dict[str, Any]: + self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv + ) -> dict[str, Any]: # Parse noise string # Note: only off-policy algorithms are supported if hyperparams.get("noise_type") is not None: @@ -667,7 +667,7 @@ def make_env(**kwargs) -> gym.Env: return env - def _load_pretrained_agent(self, hyperparams: Dict[str, Any], env: VecEnv) -> BaseAlgorithm: + def _load_pretrained_agent(self, hyperparams: dict[str, Any], env: VecEnv) -> BaseAlgorithm: # Continue training print("Loading pretrained agent") # Policy should not be changed diff --git a/rl_zoo3/hyperparams_opt.py b/rl_zoo3/hyperparams_opt.py index 1ff6708a0..4b77db4a0 100644 --- a/rl_zoo3/hyperparams_opt.py +++ b/rl_zoo3/hyperparams_opt.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any import numpy as np import optuna @@ -8,7 +8,7 @@ from rl_zoo3 import linear_schedule -def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for PPO hyperparams. @@ -76,7 +76,7 @@ def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition } -def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for RecurrentPPO hyperparams. uses sample_ppo_params(), this function samples for the policy_kwargs @@ -98,7 +98,7 @@ def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, add return hyperparams -def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for TRPO hyperparams. @@ -165,7 +165,7 @@ def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio } -def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for A2C hyperparams. @@ -229,7 +229,7 @@ def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition } -def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for SAC hyperparams. @@ -290,7 +290,7 @@ def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition return hyperparams -def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for TD3 hyperparams. @@ -346,7 +346,7 @@ def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition return hyperparams -def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for DDPG hyperparams. @@ -400,7 +400,7 @@ def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio return hyperparams -def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for DQN hyperparams. @@ -444,7 +444,7 @@ def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition return hyperparams -def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwargs: Dict[str, Any]) -> Dict[str, Any]: +def sample_her_params(trial: optuna.Trial, hyperparams: dict[str, Any], her_kwargs: dict[str, Any]) -> dict[str, Any]: """ Sampler for HerReplayBuffer hyperparams. @@ -461,7 +461,7 @@ def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwar return hyperparams -def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for TQC hyperparams. @@ -480,7 +480,7 @@ def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition return hyperparams -def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for QR-DQN hyperparams. @@ -496,7 +496,7 @@ def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additi return hyperparams -def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]: +def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]: """ Sampler for ARS hyperparams. :param trial: diff --git a/rl_zoo3/push_to_hub.py b/rl_zoo3/push_to_hub.py index ede7d9f77..499bcc366 100644 --- a/rl_zoo3/push_to_hub.py +++ b/rl_zoo3/push_to_hub.py @@ -6,7 +6,7 @@ from copy import deepcopy from pathlib import Path from pprint import pformat -from typing import Any, Dict, Optional, Tuple +from typing import Any, Optional import torch as th import yaml @@ -27,7 +27,7 @@ msg = Printer() -def save_model_card(repo_dir: Path, generated_model_card: str, metadata: Dict[str, Any]) -> None: +def save_model_card(repo_dir: Path, generated_model_card: str, metadata: dict[str, Any]) -> None: """Saves a model card for the repository. :param repo_dir: repository directory @@ -50,9 +50,9 @@ def generate_model_card( env_id: str, mean_reward: float, std_reward: float, - hyperparams: Dict[str, Any], - env_kwargs: Dict[str, Any], -) -> Tuple[str, Dict[str, Any]]: + hyperparams: dict[str, Any], + env_kwargs: dict[str, Any], +) -> tuple[str, dict[str, Any]]: """ Generate the model card for the Hub @@ -131,8 +131,8 @@ def package_to_hub( algo_name: str, algo_class_name: str, log_path: Path, - hyperparams: Dict[str, Any], - env_kwargs: Dict[str, Any], + hyperparams: dict[str, Any], + env_kwargs: dict[str, Any], env_name: EnvironmentName, eval_env: VecEnv, repo_id: ModelRepoId, @@ -394,7 +394,7 @@ def package_to_hub( # Note: we assume that we push models using the same machine (same python version) # that trained them, if not, we would need to pass custom object as in enjoy.py - custom_objects: Dict[str, Any] = {} + custom_objects: dict[str, Any] = {} model = ALGOS[algo].load(model_path, env=eval_env, custom_objects=custom_objects, device=args.device, **kwargs) # Deterministic by default except for atari games diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py index 4b270e280..30d557945 100644 --- a/rl_zoo3/utils.py +++ b/rl_zoo3/utils.py @@ -3,7 +3,7 @@ import importlib import os from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Callable, Optional, Union import gymnasium as gym import stable_baselines3 as sb3 # noqa: F401 @@ -23,7 +23,7 @@ # For custom activation fn from torch import nn as nn -ALGOS: Dict[str, Type[BaseAlgorithm]] = { +ALGOS: dict[str, type[BaseAlgorithm]] = { "a2c": A2C, "ddpg": DDPG, "dqn": DQN, @@ -45,7 +45,7 @@ def flatten_dict_observations(env: gym.Env) -> gym.Env: return gym.wrappers.FlattenObservation(env) -def get_wrapper_class(hyperparams: Dict[str, Any], key: str = "env_wrapper") -> Optional[Callable[[gym.Env], gym.Env]]: +def get_wrapper_class(hyperparams: dict[str, Any], key: str = "env_wrapper") -> Optional[Callable[[gym.Env], gym.Env]]: """ Get one or more Gym environment wrapper class specified as a hyper parameter "env_wrapper". @@ -118,7 +118,7 @@ def wrap_env(env: gym.Env) -> gym.Env: return None -def get_class_by_name(name: str) -> Type: +def get_class_by_name(name: str) -> type: """ Imports and returns a class given the name, e.g. passing 'stable_baselines3.common.callbacks.CheckpointCallback' returns the @@ -138,7 +138,7 @@ def get_class_name(name: str) -> str: return getattr(module, get_class_name(name)) -def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]: +def get_callback_list(hyperparams: dict[str, Any]) -> list[BaseCallback]: """ Get one or more Callback class specified as a hyper-parameter "callback". @@ -155,7 +155,7 @@ def get_callback_list(hyperparams: Dict[str, Any]) -> List[BaseCallback]: :return: """ - callbacks: List[BaseCallback] = [] + callbacks: list[BaseCallback] = [] if "callback" in hyperparams.keys(): callback_name = hyperparams.get("callback") @@ -196,8 +196,8 @@ def create_test_env( seed: int = 0, log_dir: Optional[str] = None, should_render: bool = True, - hyperparams: Optional[Dict[str, Any]] = None, - env_kwargs: Optional[Dict[str, Any]] = None, + hyperparams: Optional[dict[str, Any]] = None, + env_kwargs: Optional[dict[str, Any]] = None, ) -> VecEnv: """ Create environment for testing a trained agent @@ -221,7 +221,7 @@ def create_test_env( if "env_wrapper" in hyperparams.keys(): del hyperparams["env_wrapper"] - vec_env_kwargs: Dict[str, Any] = {} + vec_env_kwargs: dict[str, Any] = {} # Avoid potential shared memory issue vec_env_cls = SubprocVecEnv if n_envs > 1 else DummyVecEnv @@ -299,7 +299,7 @@ def func(progress_remaining: float) -> float: return func -def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]: +def get_trained_models(log_folder: str) -> dict[str, tuple[str, str]]: """ :param log_folder: Root log folder :return: Dict representing the trained agents @@ -320,7 +320,7 @@ def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]: return trained_models -def get_hf_trained_models(organization: str = "sb3", check_filename: bool = False) -> Dict[str, Tuple[str, str]]: +def get_hf_trained_models(organization: str = "sb3", check_filename: bool = False) -> dict[str, tuple[str, str]]: """ Get pretrained models, available on the Hugginface hub for a given organization. @@ -382,7 +382,7 @@ def get_saved_hyperparams( stats_path: str, norm_reward: bool = False, test_mode: bool = False, -) -> Tuple[Dict[str, Any], Optional[str]]: +) -> tuple[dict[str, Any], Optional[str]]: """ Retrieve saved hyperparameters given a path. Return empty dict and None if the path is not valid. @@ -392,7 +392,7 @@ def get_saved_hyperparams( :param test_mode: :return: """ - hyperparams: Dict[str, Any] = {} + hyperparams: dict[str, Any] = {} if not os.path.isdir(stats_path): return hyperparams, None else: @@ -448,7 +448,7 @@ def get_model_path( load_best: bool = False, load_checkpoint: Optional[str] = None, load_last_checkpoint: bool = False, -) -> Tuple[str, str, str]: +) -> tuple[str, str, str]: if exp_id == 0: exp_id = get_latest_run_id(os.path.join(folder, algo), env_name) print(f"Loading latest experiment, id={exp_id}") diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt index 197c4d5c2..b8feefb94 100644 --- a/rl_zoo3/version.txt +++ b/rl_zoo3/version.txt @@ -1 +1 @@ -2.4.0 +2.5.0a0 diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py index c9b7810ed..e820fc6b0 100644 --- a/rl_zoo3/wrappers.py +++ b/rl_zoo3/wrappers.py @@ -1,4 +1,4 @@ -from typing import Any, ClassVar, Dict, Optional, SupportsFloat, Tuple +from typing import Any, ClassVar, Optional, SupportsFloat import gymnasium as gym import numpy as np @@ -54,7 +54,7 @@ def __init__(self, env: gym.Env, noise_std: float = 0.1): super().__init__(env) self.noise_std = noise_std - def step(self, action: np.ndarray) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]: + def step(self, action: np.ndarray) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]: assert isinstance(self.action_space, spaces.Box) noise = np.random.normal(np.zeros_like(action), np.ones_like(action) * self.noise_std) noisy_action = np.clip(action + noise, self.action_space.low, self.action_space.high) @@ -165,7 +165,7 @@ def __init__(self, env: gym.Env, horizon: int = 2): def _create_obs_from_history(self) -> np.ndarray: return np.concatenate((self.obs_history, self.action_history)) - def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[np.ndarray, Dict]: + def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[np.ndarray, dict]: # Flush the history self.obs_history[...] = 0 self.action_history[...] = 0 @@ -174,7 +174,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> T self.obs_history[..., -obs.shape[-1] :] = obs return self._create_obs_from_history(), info - def step(self, action) -> Tuple[np.ndarray, SupportsFloat, bool, bool, Dict]: + def step(self, action) -> tuple[np.ndarray, SupportsFloat, bool, bool, dict]: obs, reward, terminated, truncated, info = self.env.step(action) last_ax_size = obs.shape[-1] @@ -230,7 +230,7 @@ def __init__(self, env: gym.Env, horizon: int = 2): def _create_obs_from_history(self) -> np.ndarray: return np.concatenate((self.obs_history, self.action_history)) - def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[Dict[str, np.ndarray], Dict]: + def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple[dict[str, np.ndarray], dict]: # Flush the history self.obs_history[...] = 0 self.action_history[...] = 0 @@ -243,7 +243,7 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> T return obs_dict, info - def step(self, action) -> Tuple[Dict[str, np.ndarray], SupportsFloat, bool, bool, Dict]: + def step(self, action) -> tuple[dict[str, np.ndarray], SupportsFloat, bool, bool, dict]: obs_dict, reward, terminated, truncated, info = self.env.step(action) obs = obs_dict["observation"] last_ax_size = obs.shape[-1] @@ -299,7 +299,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper): """ # Supported envs - velocity_indices: ClassVar[Dict[str, np.ndarray]] = { + velocity_indices: ClassVar[dict[str, np.ndarray]] = { "CartPole-v1": np.array([1, 3]), "MountainCar-v0": np.array([1]), "MountainCarContinuous-v0": np.array([1]), diff --git a/scripts/create_cluster_jobs.py b/scripts/create_cluster_jobs.py index ed84627c9..b7b523fe1 100644 --- a/scripts/create_cluster_jobs.py +++ b/scripts/create_cluster_jobs.py @@ -5,7 +5,6 @@ import os import subprocess import time -from typing import List import numpy as np @@ -35,7 +34,7 @@ log_folder, "-uuid", ] - arg_str_list: List[str] = list(map(str, args)) + arg_str_list: list[str] = list(map(str, args)) command = " ".join(["python", "-u", "train.py", *arg_str_list]) diff --git a/scripts/run_jobs.py b/scripts/run_jobs.py index 5d5a87794..ff90765e5 100644 --- a/scripts/run_jobs.py +++ b/scripts/run_jobs.py @@ -3,7 +3,6 @@ """ import subprocess -from typing import List import numpy as np @@ -33,6 +32,6 @@ "-f", log_folder, ] - arg_str_list: List[str] = list(map(str, args)) + arg_str_list: list[str] = list(map(str, args)) ok = subprocess.call(["python", "train.py", *arg_str_list]) diff --git a/setup.py b/setup.py index fd98f84f0..6426672a8 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ See https://github.com/DLR-RM/rl-baselines3-zoo """ install_requires = [ - "sb3_contrib>=2.4.0,<3.0", + "sb3_contrib>=2.5.0a0,<3.0", "gymnasium>=0.29.1,<1.1.0", "huggingface_sb3>=3.0,<4.0", "tqdm", @@ -56,7 +56,7 @@ long_description=long_description, long_description_content_type="text/markdown", version=__version__, - python_requires=">=3.8", + python_requires=">=3.9", # PyPI package information. project_urls={ "Code": "https://github.com/DLR-RM/rl-baselines3-zoo", @@ -68,10 +68,10 @@ }, classifiers=[ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) From 18fec193141963d0ee9b4ad9464edae8258108fa Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 18 Nov 2024 16:00:27 +0100 Subject: [PATCH 2/4] Update trained agent CI too --- .github/workflows/trained_agents.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml index 8199ca671..de21319bd 100644 --- a/.github/workflows/trained_agents.yml +++ b/.github/workflows/trained_agents.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] include: # Default version - gymnasium-version: "1.0.0" @@ -45,7 +45,6 @@ jobs: # See https://github.com/astral-sh/uv/issues/1497 uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu # Install full requirements (for additional envs and test tools) - # Install full requirements (for additional envs and test tools) uv pip install --system -r requirements.txt # Use headless version uv pip install --system opencv-python-headless @@ -54,6 +53,7 @@ jobs: - name: Install specific version of gym run: | uv pip install --system gymnasium==${{ matrix.gymnasium-version }} + uv pip install --system "numpy<2" # Only run for python 3.10, downgrade gym to 0.29.1 - name: Check trained agents From de1daa762646f1d9d8314898d1486510076e0a51 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 18 Nov 2024 16:17:46 +0100 Subject: [PATCH 3/4] Add missing condition in CI --- .github/workflows/ci.yml | 1 + .github/workflows/trained_agents.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c2cc3773..9050c29ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,7 @@ jobs: uv pip install --system gymnasium==${{ matrix.gymnasium-version }} uv pip install --system "numpy<2" # Only run for python 3.10, downgrade gym to 0.29.1 + if: matrix.gymnasium-version != '1.0.0' - name: Lint with ruff run: | diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml index de21319bd..a6b5ad8a8 100644 --- a/.github/workflows/trained_agents.yml +++ b/.github/workflows/trained_agents.yml @@ -55,6 +55,7 @@ jobs: uv pip install --system gymnasium==${{ matrix.gymnasium-version }} uv pip install --system "numpy<2" # Only run for python 3.10, downgrade gym to 0.29.1 + if: matrix.gymnasium-version != '1.0.0' - name: Check trained agents run: | From e92ab553d72716696b1f0d8ad8336b43d537b021 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Tue, 19 Nov 2024 11:12:26 +0100 Subject: [PATCH 4/4] Downgrade numpy for pybullet and add a notice --- .github/workflows/trained_agents.yml | 3 +++ README.md | 2 ++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml index a6b5ad8a8..1993e55dc 100644 --- a/.github/workflows/trained_agents.yml +++ b/.github/workflows/trained_agents.yml @@ -49,6 +49,9 @@ jobs: # Use headless version uv pip install --system opencv-python-headless uv pip install --system -e .[plots,tests] + # Downgrade numpy to run pybullet agents + # See https://github.com/bulletphysics/bullet3/issues/4649 + uv pip install --system "numpy<2" - name: Install specific version of gym run: | diff --git a/README.md b/README.md index 0b205060f..7c4c9ad74 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ Goals of this repository: This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo). +Note: although SB3 and the RL Zoo are compatible with Numpy>=2.0, you will need Numpy<2 to run agents on pybullet envs (see [issue](https://github.com/bulletphysics/bullet3/issues/4649)). + ## Documentation Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)