Skip to content

Commit

Permalink
Drop python 3.8, add python 3.12 support (#477)
Browse files Browse the repository at this point in the history
* Drop python 3.8, add python 3.12 support

* Update trained agent CI too

* Add missing condition in CI

* Downgrade numpy for pybullet and add a notice
  • Loading branch information
araffin authored Nov 19, 2024
1 parent b8ff1a6 commit 633954f
Show file tree
Hide file tree
Showing 18 changed files with 106 additions and 88 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
include:
# Default version
- gymnasium-version: "1.0.0"
Expand Down Expand Up @@ -51,7 +51,9 @@ jobs:
- name: Install specific version of gym
run: |
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
uv pip install --system "numpy<2"
# Only run for python 3.10, downgrade gym to 0.29.1
if: matrix.gymnasium-version != '1.0.0'

- name: Lint with ruff
run: |
Expand All @@ -65,8 +67,6 @@ jobs:
- name: Type check
run: |
make type
# Do not run for python 3.8 (mypy internal error)
if: matrix.python-version != '3.8'
- name: Test with pytest
run: |
make pytest
8 changes: 6 additions & 2 deletions .github/workflows/trained_agents.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
include:
# Default version
- gymnasium-version: "1.0.0"
Expand All @@ -45,16 +45,20 @@ jobs:
# See https://github.com/astral-sh/uv/issues/1497
uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
# Install full requirements (for additional envs and test tools)
# Install full requirements (for additional envs and test tools)
uv pip install --system -r requirements.txt
# Use headless version
uv pip install --system opencv-python-headless
uv pip install --system -e .[plots,tests]
# Downgrade numpy to run pybullet agents
# See https://github.com/bulletphysics/bullet3/issues/4649
uv pip install --system "numpy<2"
- name: Install specific version of gym
run: |
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
uv pip install --system "numpy<2"
# Only run for python 3.10, downgrade gym to 0.29.1
if: matrix.gymnasium-version != '1.0.0'

- name: Check trained agents
run: |
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
## Release 2.5.0a0 (WIP)

### Breaking Changes
- Upgraded to Pytorch >= 2.3.0
- Upgraded to SB3 >= 2.5.0

### New Features
- Added support for Numpy v2

### Bug fixes

### Documentation

### Other


## Release 2.4.0 (2024-11-18)

**New algorithm: CrossQ, Gymnasium v1.0 support, and better defaults for SAC/TQC on Swimmer-v4 env**
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Goals of this repository:

This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo).

Note: although SB3 and the RL Zoo are compatible with Numpy>=2.0, you will need Numpy<2 to run agents on pybullet envs (see [issue](https://github.com/bulletphysics/bullet3/issues/4649)).

## Documentation

Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)
Expand Down
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import datetime
import os
import sys
from typing import Dict

# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
# PyEnchant.
Expand Down Expand Up @@ -151,7 +150,7 @@ def setup(app):

# -- Options for LaTeX output ------------------------------------------------

latex_elements: Dict[str, str] = {
latex_elements: dict[str, str] = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.ruff]
# Same as Black.
line-length = 127
# Assume Python 3.8
target-version = "py38"
# Assume Python 3.9
target-version = "py39"

[tool.ruff.lint]
# See https://beta.ruff.rs/docs/rules/
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
gym==0.26.2
stable-baselines3[extra,tests,docs]>=2.4.0,<3.0
stable-baselines3[extra,tests,docs]>=2.5.0a0,<3.0
box2d-py==2.3.8
pybullet_envs_gymnasium>=0.5.0
# minigrid
Expand Down
3 changes: 1 addition & 2 deletions rl_zoo3/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import shutil
import subprocess
from typing import Dict, List

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -33,7 +32,7 @@
trained_models.update(get_hf_trained_models())

n_experiments = len(trained_models)
results: Dict[str, List] = {
results: dict[str, list] = {
"algo": [],
"env_id": [],
"mean_reward": [],
Expand Down
4 changes: 2 additions & 2 deletions rl_zoo3/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import deepcopy
from functools import wraps
from threading import Thread
from typing import Optional, Type, Union
from typing import Optional, Union

import optuna
from sb3_contrib import TQC
Expand Down Expand Up @@ -119,7 +119,7 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa
self._model: Union[SAC, TQC]
self.gradient_steps = gradient_steps
self.process: Thread
self.model_class: Union[Type[SAC], Type[TQC]]
self.model_class: Union[type[SAC], type[TQC]]
self.sleep_time = sleep_time

def _init_callback(self) -> None:
Expand Down
48 changes: 24 additions & 24 deletions rl_zoo3/exp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import OrderedDict
from pathlib import Path
from pprint import pprint
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Optional, Union

import gymnasium as gym
import numpy as np
Expand Down Expand Up @@ -71,9 +71,9 @@ def __init__(
eval_freq: int = 10000,
n_eval_episodes: int = 5,
save_freq: int = -1,
hyperparams: Optional[Dict[str, Any]] = None,
env_kwargs: Optional[Dict[str, Any]] = None,
eval_env_kwargs: Optional[Dict[str, Any]] = None,
hyperparams: Optional[dict[str, Any]] = None,
env_kwargs: Optional[dict[str, Any]] = None,
eval_env_kwargs: Optional[dict[str, Any]] = None,
trained_agent: str = "",
optimize_hyperparameters: bool = False,
storage: Optional[str] = None,
Expand Down Expand Up @@ -112,10 +112,10 @@ def __init__(
default_path = Path(__file__).parent.parent

self.config = config or str(default_path / f"hyperparams/{self.algo}.yml")
self.env_kwargs: Dict[str, Any] = env_kwargs or {}
self.env_kwargs: dict[str, Any] = env_kwargs or {}
self.n_timesteps = n_timesteps
self.normalize = False
self.normalize_kwargs: Dict[str, Any] = {}
self.normalize_kwargs: dict[str, Any] = {}
self.env_wrapper: Optional[Callable] = None
self.frame_stack = None
self.seed = seed
Expand All @@ -124,23 +124,23 @@ def __init__(
self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[vec_env_type]
self.vec_env_wrapper: Optional[Callable] = None

self.vec_env_kwargs: Dict[str, Any] = {}
self.vec_env_kwargs: dict[str, Any] = {}
# self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}

# Callbacks
self.specified_callbacks: List = []
self.callbacks: List[BaseCallback] = []
self.specified_callbacks: list = []
self.callbacks: list[BaseCallback] = []
# Use env-kwargs if eval_env_kwargs was not specified
self.eval_env_kwargs: Dict[str, Any] = eval_env_kwargs or self.env_kwargs
self.eval_env_kwargs: dict[str, Any] = eval_env_kwargs or self.env_kwargs
self.save_freq = save_freq
self.eval_freq = eval_freq
self.n_eval_episodes = n_eval_episodes
self.n_eval_envs = n_eval_envs

self.n_envs = 1 # it will be updated when reading hyperparams
self.n_actions = 0 # For DDPG/TD3 action noise objects
self._hyperparams: Dict[str, Any] = {}
self.monitor_kwargs: Dict[str, Any] = {}
self._hyperparams: dict[str, Any] = {}
self.monitor_kwargs: dict[str, Any] = {}

self.trained_agent = trained_agent
self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent)
Expand Down Expand Up @@ -179,7 +179,7 @@ def __init__(
)
self.params_path = f"{self.save_path}/{self.env_name}"

def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]:
"""
Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
create the environment and possibly the model.
Expand Down Expand Up @@ -223,7 +223,7 @@ def learn(self, model: BaseAlgorithm) -> None:
"""
:param model: an initialized RL model
"""
kwargs: Dict[str, Any] = {}
kwargs: dict[str, Any] = {}
if self.log_interval > -1:
kwargs = {"log_interval": self.log_interval}

Expand Down Expand Up @@ -272,7 +272,7 @@ def save_trained_model(self, model: BaseAlgorithm) -> None:
assert vec_normalize is not None
vec_normalize.save(os.path.join(self.params_path, "vecnormalize.pkl"))

def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
def _save_config(self, saved_hyperparams: dict[str, Any]) -> None:
"""
Save unprocessed hyperparameters, this can be use later
to reproduce an experiment.
Expand All @@ -290,15 +290,15 @@ def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:

print(f"Log path: {self.save_path}")

def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
print(f"Loading hyperparameters from: {self.config}")

if self.config.endswith(".yml") or self.config.endswith(".yaml"):
# Load hyperparameters from yaml file
with open(self.config) as f:
hyperparams_dict = yaml.safe_load(f)
elif self.config.endswith(".py"):
global_variables: Dict = {}
global_variables: dict = {}
# Load hyperparameters from python file
exec(Path(self.config).read_text(), global_variables)
hyperparams_dict = global_variables["hyperparams"]
Expand Down Expand Up @@ -327,7 +327,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
return hyperparams, saved_hyperparams

@staticmethod
def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
# Create schedules
for key in ["learning_rate", "clip_range", "clip_range_vf", "delta_std"]:
if key not in hyperparams:
Expand All @@ -345,7 +345,7 @@ def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
return hyperparams

def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, Any]:
if "normalize" in hyperparams.keys():
self.normalize = hyperparams["normalize"]

Expand All @@ -370,8 +370,8 @@ def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, An
return hyperparams

def _preprocess_hyperparams( # noqa: C901
self, hyperparams: Dict[str, Any]
) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback], Optional[Callable]]:
self, hyperparams: dict[str, Any]
) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]:
self.n_envs = hyperparams.get("n_envs", 1)

if self.verbose > 0:
Expand Down Expand Up @@ -448,8 +448,8 @@ def _preprocess_hyperparams( # noqa: C901
return hyperparams, env_wrapper, callbacks, vec_env_wrapper

def _preprocess_action_noise(
self, hyperparams: Dict[str, Any], saved_hyperparams: Dict[str, Any], env: VecEnv
) -> Dict[str, Any]:
self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv
) -> dict[str, Any]:
# Parse noise string
# Note: only off-policy algorithms are supported
if hyperparams.get("noise_type") is not None:
Expand Down Expand Up @@ -667,7 +667,7 @@ def make_env(**kwargs) -> gym.Env:

return env

def _load_pretrained_agent(self, hyperparams: Dict[str, Any], env: VecEnv) -> BaseAlgorithm:
def _load_pretrained_agent(self, hyperparams: dict[str, Any], env: VecEnv) -> BaseAlgorithm:
# Continue training
print("Loading pretrained agent")
# Policy should not be changed
Expand Down
Loading

0 comments on commit 633954f

Please sign in to comment.