diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst index 0d097483f..a4729bfb3 100644 --- a/docs/guide/examples.rst +++ b/docs/guide/examples.rst @@ -364,7 +364,7 @@ Atari Games Training a RL agent on Atari games is straightforward thanks to ``make_atari_env`` helper function. It will do `all the preprocessing `_ -and multiprocessing for you. To install the Atari environments, run the command ``pip install gym[atari, accept-rom-license]`` to install the Atari environments and ROMs, or install Stable Baselines3 with ``pip install stable-baselines3[extra]`` to install this and other optional dependencies. +and multiprocessing for you. To install the Atari environments, run the command ``pip install gymnasium[atari,accept-rom-license]`` to install the Atari environments and ROMs, or install Stable Baselines3 with ``pip install stable-baselines3[extra]`` to install this and other optional dependencies. .. image:: ../_static/img/colab-badge.svg :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/atari_games.ipynb diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 25121b263..410a5df0a 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -3,10 +3,16 @@ Changelog ========== -Release 2.2.0a11 (WIP) +Release 2.2.1 (2023-11-17) -------------------------- **Support for options at reset, bug fixes and better error messages** +.. note:: + + SB3 v2.2.0 was yanked after a breaking change was found in `GH#1751 `_. + Please use SB3 v2.2.1 and not v2.2.0. + + Breaking Changes: ^^^^^^^^^^^^^^^^^ - Switched to ``ruff`` for sorting imports (isort is no longer needed), black and ruff version now require a minimum version @@ -32,12 +38,24 @@ Bug Fixes: - Fixed success reward dtype in ``SimpleMultiObsEnv`` (@NixGD) - Fixed check_env for Sequence observation space (@corentinlger) - Prevents instantiating BitFlippingEnv with conflicting observation spaces (@kylesayrs) +- Fixed ResourceWarning when loading and saving models (files were not closed), please note that only path are closed automatically, + the behavior stay the same for tempfiles (they need to be closed manually), + the behavior is now consistent when loading/saving replay buffer `SB3-Contrib`_ ^^^^^^^^^^^^^^ +- Added ``set_options`` for ``AsyncEval`` +- Added ``rollout_buffer_class`` and ``rollout_buffer_kwargs`` arguments to TRPO `RL Zoo`_ ^^^^^^^^^ +- Removed `gym` dependency, the package is still required for some pretrained agents. +- Added `--eval-env-kwargs` to `train.py` (@Quentin18) +- Added `ppo_lstm` to hyperparams_opt.py (@technocrat13) +- Upgraded to `pybullet_envs_gymnasium>=0.4.0` +- Removed old hacks (for instance limiting offpolicy algorithms to one env at test time) +- Updated docker image, removed support for X server +- Replaced deprecated `optuna.suggest_uniform(...)` by `optuna.suggest_float(..., low=..., high=...)` `SBX`_ (SB3 + Jax) ^^^^^^^^^^^^^^^^^^ @@ -66,11 +84,14 @@ Others: - Switched to PyTorch 2.1.0 in the CI (fixes type annotations) - Fixed ``stable_baselines3/common/policies.py`` type hints - Switched to ``mypy`` only for checking types +- Added tests to check consistency when saving/loading files Documentation: ^^^^^^^^^^^^^^ - Updated RL Tips and Tricks (include recommendation for evaluation, added links to DroQ, ARS and SBX). - Fixed various typos and grammar mistakes +- Added PokemonRedExperiments to the project page +- Fixed an out-of-date command for installing Atari in examples Release 2.1.0 (2023-08-17) -------------------------- diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst index 39803018e..2b2e2405c 100644 --- a/docs/misc/projects.rst +++ b/docs/misc/projects.rst @@ -229,3 +229,13 @@ intelligent agents to perform network slice placement. | Author: Alex Pasquali | Github: https://github.com/AlexPasqua/DeepNetSlice | Paper: **under review** (citation instructions on the project's README.md) -> see this Master's Thesis for the moment: https://etd.adm.unipi.it/theses/available/etd-01182023-110038/unrestricted/Tesi_magistrale_Pasquali_Alex.pdf + + +PokemonRedExperiments +--------------------- + +Playing Pokemon Red with Reinforcement Learning. + +| Author: Peter Whidden +| Github: https://github.com/PWhiddy/PokemonRedExperiments +| Video: https://www.youtube.com/watch?v=DcYLT37ImBY diff --git a/stable_baselines3/common/save_util.py b/stable_baselines3/common/save_util.py index 332158545..0cbf6d4e2 100644 --- a/stable_baselines3/common/save_util.py +++ b/stable_baselines3/common/save_util.py @@ -308,14 +308,14 @@ def save_to_zip_file( :param pytorch_variables: Other PyTorch variables expected to contain name and value of the variable. :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ - save_path = open_path(save_path, "w", verbose=0, suffix="zip") + file = open_path(save_path, "w", verbose=0, suffix="zip") # data/params can be None, so do not # try to serialize them blindly if data is not None: serialized_data = data_to_json(data) # Create a zip-archive and write our objects there. - with zipfile.ZipFile(save_path, mode="w") as archive: + with zipfile.ZipFile(file, mode="w") as archive: # Do not try to save "None" elements if data is not None: archive.writestr("data", serialized_data) @@ -331,6 +331,9 @@ def save_to_zip_file( # Save system info about the current python env archive.writestr("system_info.txt", get_system_info(print_info=False)[1]) + if isinstance(save_path, (str, pathlib.Path)): + file.close() + def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, verbose: int = 0) -> None: """ @@ -344,10 +347,12 @@ def save_to_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], obj: Any, ver :param obj: The object to save. :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ - with open_path(path, "w", verbose=verbose, suffix="pkl") as file_handler: - # Use protocol>=4 to support saving replay buffers >= 4Gb - # See https://docs.python.org/3/library/pickle.html - pickle.dump(obj, file_handler, protocol=pickle.HIGHEST_PROTOCOL) + file = open_path(path, "w", verbose=verbose, suffix="pkl") + # Use protocol>=4 to support saving replay buffers >= 4Gb + # See https://docs.python.org/3/library/pickle.html + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) + if isinstance(path, (str, pathlib.Path)): + file.close() def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: int = 0) -> Any: @@ -360,8 +365,11 @@ def load_from_pkl(path: Union[str, pathlib.Path, io.BufferedIOBase], verbose: in path actually exists. If path is a io.BufferedIOBase the path exists. :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages """ - with open_path(path, "r", verbose=verbose, suffix="pkl") as file_handler: - return pickle.load(file_handler) + file = open_path(path, "r", verbose=verbose, suffix="pkl") + obj = pickle.load(file) + if isinstance(path, (str, pathlib.Path)): + file.close() + return obj def load_from_zip_file( @@ -391,14 +399,14 @@ def load_from_zip_file( :return: Class parameters, model state_dicts (aka "params", dict of state_dict) and dict of pytorch variables """ - load_path = open_path(load_path, "r", verbose=verbose, suffix="zip") + file = open_path(load_path, "r", verbose=verbose, suffix="zip") # set device to cpu if cuda is not available device = get_device(device=device) # Open the zip archive and load data try: - with zipfile.ZipFile(load_path) as archive: + with zipfile.ZipFile(file) as archive: namelist = archive.namelist() # If data or parameters is not in the # zip archive, assume they were stored @@ -450,4 +458,7 @@ def load_from_zip_file( except zipfile.BadZipFile as e: # load_path wasn't a zip file raise ValueError(f"Error: the file {load_path} wasn't a zip-file") from e + finally: + if isinstance(load_path, (str, pathlib.Path)): + file.close() return data, params, pytorch_variables diff --git a/stable_baselines3/common/vec_env/subproc_vec_env.py b/stable_baselines3/common/vec_env/subproc_vec_env.py index 83758841b..c598c735a 100644 --- a/stable_baselines3/common/vec_env/subproc_vec_env.py +++ b/stable_baselines3/common/vec_env/subproc_vec_env.py @@ -128,14 +128,14 @@ def step_async(self, actions: np.ndarray) -> None: def step_wait(self) -> VecEnvStepReturn: results = [remote.recv() for remote in self.remotes] self.waiting = False - obs, rews, dones, infos, self.reset_infos = zip(*results) - return _flatten_obs(obs, self.observation_space), np.stack(rews), np.stack(dones), infos + obs, rews, dones, infos, self.reset_infos = zip(*results) # type: ignore[assignment] + return _flatten_obs(obs, self.observation_space), np.stack(rews), np.stack(dones), infos # type: ignore[return-value] def reset(self) -> VecEnvObs: for env_idx, remote in enumerate(self.remotes): remote.send(("reset", (self._seeds[env_idx], self._options[env_idx]))) results = [remote.recv() for remote in self.remotes] - obs, self.reset_infos = zip(*results) + obs, self.reset_infos = zip(*results) # type: ignore[assignment] # Seeds and options are only used once self._reset_seeds() self._reset_options() diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt index 13ce6d730..c043eea77 100644 --- a/stable_baselines3/version.txt +++ b/stable_baselines3/version.txt @@ -1 +1 @@ -2.2.0a11 +2.2.1 diff --git a/tests/test_save_load.py b/tests/test_save_load.py index b574d7456..e7123e984 100644 --- a/tests/test_save_load.py +++ b/tests/test_save_load.py @@ -3,6 +3,7 @@ import json import os import pathlib +import tempfile import warnings import zipfile from collections import OrderedDict @@ -747,3 +748,38 @@ def test_dqn_target_update_interval(tmp_path): model = DQN.load(tmp_path / "dqn_cartpole") os.remove(tmp_path / "dqn_cartpole.zip") assert model.target_update_interval == 100 + + +# Turn warnings into errors +@pytest.mark.filterwarnings("error") +def test_no_resource_warning(tmp_path): + # Check behavior of save/load + # see https://github.com/DLR-RM/stable-baselines3/issues/1751 + + # check that files are properly closed + # Create a PPO agent and save it + PPO("MlpPolicy", "CartPole-v1").save(tmp_path / "dqn_cartpole") + PPO.load(tmp_path / "dqn_cartpole") + + PPO("MlpPolicy", "CartPole-v1").save(str(tmp_path / "dqn_cartpole")) + PPO.load(str(tmp_path / "dqn_cartpole")) + + # Do the same but in memory, should not close the file + with tempfile.TemporaryFile() as fp: + PPO("MlpPolicy", "CartPole-v1").save(fp) + PPO.load(fp) + assert not fp.closed + + # Same but with replay buffer + model = SAC("MlpPolicy", "Pendulum-v1", buffer_size=200) + model.save_replay_buffer(tmp_path / "replay") + model.load_replay_buffer(tmp_path / "replay") + + model.save_replay_buffer(str(tmp_path / "replay")) + model.load_replay_buffer(str(tmp_path / "replay")) + + with tempfile.TemporaryFile() as fp: + model.save_replay_buffer(fp) + fp.seek(0) + model.load_replay_buffer(fp) + assert not fp.closed