diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
index 0d097483f..a4729bfb3 100644
--- a/docs/guide/examples.rst
+++ b/docs/guide/examples.rst
@@ -364,7 +364,7 @@ Atari Games
Training a RL agent on Atari games is straightforward thanks to ``make_atari_env`` helper function.
It will do `all the preprocessing `_
-and multiprocessing for you. To install the Atari environments, run the command ``pip install gym[atari, accept-rom-license]`` to install the Atari environments and ROMs, or install Stable Baselines3 with ``pip install stable-baselines3[extra]`` to install this and other optional dependencies.
+and multiprocessing for you. To install the Atari environments, run the command ``pip install gymnasium[atari,accept-rom-license]`` to install the Atari environments and ROMs, or install Stable Baselines3 with ``pip install stable-baselines3[extra]`` to install this and other optional dependencies.
.. image:: ../_static/img/colab-badge.svg
:target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/atari_games.ipynb
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 25121b263..410a5df0a 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -3,10 +3,16 @@
Changelog
==========
-Release 2.2.0a11 (WIP)
+Release 2.2.1 (2023-11-17)
--------------------------
**Support for options at reset, bug fixes and better error messages**
+.. note::
+
+ SB3 v2.2.0 was yanked after a breaking change was found in `GH#1751 `_.
+ Please use SB3 v2.2.1 and not v2.2.0.
+
+
Breaking Changes:
^^^^^^^^^^^^^^^^^
- Switched to ``ruff`` for sorting imports (isort is no longer needed), black and ruff version now require a minimum version
@@ -32,12 +38,24 @@ Bug Fixes:
- Fixed success reward dtype in ``SimpleMultiObsEnv`` (@NixGD)
- Fixed check_env for Sequence observation space (@corentinlger)
- Prevents instantiating BitFlippingEnv with conflicting observation spaces (@kylesayrs)
+- Fixed ResourceWarning when loading and saving models (files were not closed), please note that only path are closed automatically,
+ the behavior stay the same for tempfiles (they need to be closed manually),
+ the behavior is now consistent when loading/saving replay buffer
`SB3-Contrib`_
^^^^^^^^^^^^^^
+- Added ``set_options`` for ``AsyncEval``
+- Added ``rollout_buffer_class`` and ``rollout_buffer_kwargs`` arguments to TRPO
`RL Zoo`_
^^^^^^^^^
+- Removed `gym` dependency, the package is still required for some pretrained agents.
+- Added `--eval-env-kwargs` to `train.py` (@Quentin18)
+- Added `ppo_lstm` to hyperparams_opt.py (@technocrat13)
+- Upgraded to `pybullet_envs_gymnasium>=0.4.0`
+- Removed old hacks (for instance limiting offpolicy algorithms to one env at test time)
+- Updated docker image, removed support for X server
+- Replaced deprecated `optuna.suggest_uniform(...)` by `optuna.suggest_float(..., low=..., high=...)`
`SBX`_ (SB3 + Jax)
^^^^^^^^^^^^^^^^^^
@@ -66,11 +84,14 @@ Others:
- Switched to PyTorch 2.1.0 in the CI (fixes type annotations)
- Fixed ``stable_baselines3/common/policies.py`` type hints
- Switched to ``mypy`` only for checking types
+- Added tests to check consistency when saving/loading files
Documentation:
^^^^^^^^^^^^^^
- Updated RL Tips and Tricks (include recommendation for evaluation, added links to DroQ, ARS and SBX).
- Fixed various typos and grammar mistakes
+- Added PokemonRedExperiments to the project page
+- Fixed an out-of-date command for installing Atari in examples
Release 2.1.0 (2023-08-17)
--------------------------
diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
index 39803018e..2b2e2405c 100644
--- a/docs/misc/projects.rst
+++ b/docs/misc/projects.rst
@@ -229,3 +229,13 @@ intelligent agents to perform network slice placement.
| Author: Alex Pasquali
| Github: https://github.com/AlexPasqua/DeepNetSlice
| Paper: **under review** (citation instructions on the project's README.md) -> see this Master's Thesis for the moment: https://etd.adm.unipi.it/theses/available/etd-01182023-110038/unrestricted/Tesi_magistrale_Pasquali_Alex.pdf
+
+
+PokemonRedExperiments
+---------------------
+
+Playing Pokemon Red with Reinforcement Learning.
+
+| Author: Peter Whidden
+| Github: https://github.com/PWhiddy/PokemonRedExperiments
+| Video: https://www.youtube.com/watch?v=DcYLT37ImBY
diff --git a/stable_baselines3/common/save_util.py b/stable_baselines3/common/save_util.py
index 332158545..0cbf6d4e2 100644
--- a/stable_baselines3/common/save_util.py
+++ b/stable_baselines3/common/save_util.py
@@ -308,14 +308,14 @@ def save_to_zip_file(
:param pytorch_variables: Other PyTorch variables expected to contain name and value of the variable.
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
"""
- save_path = open_path(save_path, "w", verbose=0, suffix="zip")
+ file = open_path(save_path, "w", verbose=0, suffix="zip")
# data/params can be None, so do not
# try to serialize them blindly
if data is not None:
serialized_data = data_to_json(data)
# Create a zip-archive and write our objects there.
- with zipfile.ZipFile(save_path, mode="w") as archive:
+ with zipfile.ZipFile(file, mode="w") as archive:
# Do not try to save "None" elements
if data is not None:
archive.writestr("data", serialized_data)
@@ -331,6 +331,9 @@ def save_to_zip_file(
# Save system info about the current python env
archive.writestr("system_info.txt", get_system_info(print_info=False)[1])
+ if isinstance(save_path, (str, pathlib.Path)):
+ file.close()
+
def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, verbose: int = 0) -> None:
"""
@@ -344,10 +347,12 @@ def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, ver
:param obj: The object to save.
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
"""
- with open_path(path, "w", verbose=verbose, suffix="pkl") as file_handler:
- # Use protocol>=4 to support saving replay buffers >= 4Gb
- # See https://docs.python.org/3/library/pickle.html
- pickle.dump(obj, file_handler, protocol=pickle.HIGHEST_PROTOCOL)
+ file = open_path(path, "w", verbose=verbose, suffix="pkl")
+ # Use protocol>=4 to support saving replay buffers >= 4Gb
+ # See https://docs.python.org/3/library/pickle.html
+ pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL)
+ if isinstance(path, (str, pathlib.Path)):
+ file.close()
def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: int = 0) -> Any:
@@ -360,8 +365,11 @@ def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: in
path actually exists. If path is a io.BufferedIOBase the path exists.
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
"""
- with open_path(path, "r", verbose=verbose, suffix="pkl") as file_handler:
- return pickle.load(file_handler)
+ file = open_path(path, "r", verbose=verbose, suffix="pkl")
+ obj = pickle.load(file)
+ if isinstance(path, (str, pathlib.Path)):
+ file.close()
+ return obj
def load_from_zip_file(
@@ -391,14 +399,14 @@ def load_from_zip_file(
:return: Class parameters, model state_dicts (aka "params", dict of state_dict)
and dict of pytorch variables
"""
- load_path = open_path(load_path, "r", verbose=verbose, suffix="zip")
+ file = open_path(load_path, "r", verbose=verbose, suffix="zip")
# set device to cpu if cuda is not available
device = get_device(device=device)
# Open the zip archive and load data
try:
- with zipfile.ZipFile(load_path) as archive:
+ with zipfile.ZipFile(file) as archive:
namelist = archive.namelist()
# If data or parameters is not in the
# zip archive, assume they were stored
@@ -450,4 +458,7 @@ def load_from_zip_file(
except zipfile.BadZipFile as e:
# load_path wasn't a zip file
raise ValueError(f"Error: the file {load_path} wasn't a zip-file") from e
+ finally:
+ if isinstance(load_path, (str, pathlib.Path)):
+ file.close()
return data, params, pytorch_variables
diff --git a/stable_baselines3/common/vec_env/subproc_vec_env.py b/stable_baselines3/common/vec_env/subproc_vec_env.py
index 83758841b..c598c735a 100644
--- a/stable_baselines3/common/vec_env/subproc_vec_env.py
+++ b/stable_baselines3/common/vec_env/subproc_vec_env.py
@@ -128,14 +128,14 @@ def step_async(self, actions: np.ndarray) -> None:
def step_wait(self) -> VecEnvStepReturn:
results = [remote.recv() for remote in self.remotes]
self.waiting = False
- obs, rews, dones, infos, self.reset_infos = zip(*results)
- return _flatten_obs(obs, self.observation_space), np.stack(rews), np.stack(dones), infos
+ obs, rews, dones, infos, self.reset_infos = zip(*results) # type: ignore[assignment]
+ return _flatten_obs(obs, self.observation_space), np.stack(rews), np.stack(dones), infos # type: ignore[return-value]
def reset(self) -> VecEnvObs:
for env_idx, remote in enumerate(self.remotes):
remote.send(("reset", (self._seeds[env_idx], self._options[env_idx])))
results = [remote.recv() for remote in self.remotes]
- obs, self.reset_infos = zip(*results)
+ obs, self.reset_infos = zip(*results) # type: ignore[assignment]
# Seeds and options are only used once
self._reset_seeds()
self._reset_options()
diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
index 13ce6d730..c043eea77 100644
--- a/stable_baselines3/version.txt
+++ b/stable_baselines3/version.txt
@@ -1 +1 @@
-2.2.0a11
+2.2.1
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
index b574d7456..e7123e984 100644
--- a/tests/test_save_load.py
+++ b/tests/test_save_load.py
@@ -3,6 +3,7 @@
import json
import os
import pathlib
+import tempfile
import warnings
import zipfile
from collections import OrderedDict
@@ -747,3 +748,38 @@ def test_dqn_target_update_interval(tmp_path):
model = DQN.load(tmp_path / "dqn_cartpole")
os.remove(tmp_path / "dqn_cartpole.zip")
assert model.target_update_interval == 100
+
+
+# Turn warnings into errors
+@pytest.mark.filterwarnings("error")
+def test_no_resource_warning(tmp_path):
+ # Check behavior of save/load
+ # see https://github.com/DLR-RM/stable-baselines3/issues/1751
+
+ # check that files are properly closed
+ # Create a PPO agent and save it
+ PPO("MlpPolicy", "CartPole-v1").save(tmp_path / "dqn_cartpole")
+ PPO.load(tmp_path / "dqn_cartpole")
+
+ PPO("MlpPolicy", "CartPole-v1").save(str(tmp_path / "dqn_cartpole"))
+ PPO.load(str(tmp_path / "dqn_cartpole"))
+
+ # Do the same but in memory, should not close the file
+ with tempfile.TemporaryFile() as fp:
+ PPO("MlpPolicy", "CartPole-v1").save(fp)
+ PPO.load(fp)
+ assert not fp.closed
+
+ # Same but with replay buffer
+ model = SAC("MlpPolicy", "Pendulum-v1", buffer_size=200)
+ model.save_replay_buffer(tmp_path / "replay")
+ model.load_replay_buffer(tmp_path / "replay")
+
+ model.save_replay_buffer(str(tmp_path / "replay"))
+ model.load_replay_buffer(str(tmp_path / "replay"))
+
+ with tempfile.TemporaryFile() as fp:
+ model.save_replay_buffer(fp)
+ fp.seek(0)
+ model.load_replay_buffer(fp)
+ assert not fp.closed