deathcoder · deathcoder · Jul 4, 2022 · Aug 13, 2022 · Aug 13, 2022 · Aug 16, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,10 +5,10 @@ name: CI
 
 on:
   push:
-    branches: [ master ]
+    branches: [master]
   pull_request:
-    branches: [ master ]
-
+    branches: [master]
+  workflow_dispatch:
 jobs:
   build:
     env:
@@ -23,38 +23,38 @@ jobs:
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # cpu version of pytorch
-        pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # cpu version of pytorch
+          pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
 
-        # Install Atari Roms
-        pip install autorom
-        wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
-        base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
-        AutoROM --accept-license --source-file Roms.tar.gz
+          # Install Atari Roms
+          pip install autorom
+          wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+          base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+          AutoROM --accept-license --source-file Roms.tar.gz
 
-        pip install .[extra_no_roms,tests,docs]
-        # Use headless version
-        pip install opencv-python-headless
-    - name: Lint with ruff
-      run: |
-        make lint
-    - name: Build the doc
-      run: |
-        make doc
-    - name: Check codestyle
-      run: |
-        make check-codestyle
-    - name: Type check
-      run: |
-        make type
-    - name: Test with pytest
-      run: |
-        make pytest
+          pip install .[extra_no_roms,tests,docs]
+          # Use headless version
+          pip install opencv-python-headless
+      - name: Lint with ruff
+        run: |
+          make lint
+      - name: Build the doc
+        run: |
+          make doc
+      - name: Check codestyle
+        run: |
+          make check-codestyle
+      - name: Type check
+        run: |
+          make type
+      - name: Test with pytest
+        run: |
+          make pytest
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -558,6 +558,7 @@ New Features:
 - Added checkpoints for replay buffer and ``VecNormalize`` statistics (@anand-bala)
 - Added option for ``Monitor`` to append to existing file instead of overriding (@sidney-tio)
 - The env checker now raises an error when using dict observation spaces and observation keys don't match observation space keys
+- Use MacOS Metal "mps" device when available
 
 `SB3-Contrib`_
 ^^^^^^^^^^^^^^
@@ -615,6 +616,7 @@ Breaking Changes:
 
 New Features:
 ^^^^^^^^^^^^^
+- Save cloudpickle version
 
 
 `SB3-Contrib`_

diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
@@ -135,6 +135,8 @@ def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor:
         :return:
         """
         if copy:
+            if hasattr(th, "backends") and th.backends.mps.is_built():
+                return th.tensor(array, dtype=th.float32, device=self.device)
-            if hasattr(th, "backends") and th.backends.mps.is_built():
-                return th.tensor(array, dtype=th.float32, device=self.device)
+            if self.device.type == "mps":
+                return th.tensor(array, dtype=th.float32, device=self.device)
-            if hasattr(th, "backends") and th.backends.mps.is_built():
-                return th.tensor(array, dtype=th.float32, device=self.device)
+            if self.device.type == "mps":
+                return th.tensor(array, dtype=th.float32, device=self.device)
             return th.tensor(array, device=self.device)
         return th.as_tensor(array, device=self.device)
 

diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py
@@ -78,11 +78,11 @@ def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
         if self.discrete_obs_space:
             # The internal state is the binary representation of the
             # observed one
-            return int(sum(state[i] * 2**i for i in range(len(state))))
+            return int(sum(int(state[i]) * 2**i for i in range(len(state))))
 
         if self.image_obs_space:
             size = np.prod(self.image_shape)
-            image = np.concatenate((state * 255, np.zeros(size - len(state), dtype=np.uint8)))
+            image = np.concatenate((state.astype(np.uint8) * 255, np.zeros(size - len(state), dtype=np.uint8)))
-            image = np.concatenate((state.astype(np.uint8) * 255, np.zeros(size - len(state), dtype=np.uint8)))
+            image = np.zeros(size, dtype=np.uint8)
+            image[:len(state)] = state.astype(np.uint8) * 255
-            image = np.concatenate((state.astype(np.uint8) * 255, np.zeros(size - len(state), dtype=np.uint8)))
+            image = np.zeros(size, dtype=np.uint8)
+            image[:len(state)] = state.astype(np.uint8) * 255
             return image.reshape(self.image_shape).astype(np.uint8)
         return state
 

diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
@@ -29,8 +29,8 @@ def set_random_seed(seed: int, using_cuda: bool = False) -> None:
     """
     Seed the different random generators.
 
-    :param seed:
-    :param using_cuda:
+    :param seed: Seed
+    :param using_cuda: Whether CUDA is currently used
     """
     # Seed python RNG
     random.seed(seed)
@@ -138,19 +138,20 @@ def get_device(device: Union[th.device, str] = "auto") -> th.device:
     """
     Retrieve PyTorch device.
     It checks that the requested device is available first.
-    For now, it supports only cpu and cuda.
-    By default, it tries to use the gpu.
+    For now, it supports only CPU and CUDA.
+    By default, it tries to use the GPU.
 
-    :param device: One for 'auto', 'cuda', 'cpu'
+    :param device: One of "auto", "cuda", "cpu",
+        or any PyTorch supported device (for instance "mps")
     :return: Supported Pytorch device
     """
-    # Cuda by default
+    # MPS/CUDA by default
     if device == "auto":
-        device = "cuda"
+        device = get_available_accelerator()
     # Force conversion to th.device
     device = th.device(device)
 
-    # Cuda not available
+    # CUDA not available
-    # MPS/CUDA by default
-    if device == "auto":
-        device = "cuda"
-        device = get_available_accelerator()
-    # Force conversion to th.device
-    device = th.device(device)
-
-    # Cuda not available
-    # CUDA not available
+    # MPS/CUDA by default
+    if device == "auto":
+        device = get_available_accelerator()
+    # Force conversion to th.device
+    device = th.device(device)
+
+    # Check device availability
+    if device.type == "cuda" and not th.cuda.is_available():
+        return th.device("cpu")
+    elif device.type == "mps":
+        try:
+            if not (hasattr(th, "backends") and th.backends.mps.is_built() and th.backends.mps.is_available()):
+                return th.device("cpu")
+        except:
+            return th.device("cpu")
-    # MPS/CUDA by default
-    if device == "auto":
-        device = "cuda"
-        device = get_available_accelerator()
-    # Force conversion to th.device
-    device = th.device(device)
-
-    # Cuda not available
-    # CUDA not available
+    # MPS/CUDA by default
+    if device == "auto":
+        device = get_available_accelerator()
+    # Force conversion to th.device
+    device = th.device(device)
+
+    # Check device availability
+    if device.type == "cuda" and not th.cuda.is_available():
+        return th.device("cpu")
+    elif device.type == "mps":
+        try:
+            if not (hasattr(th, "backends") and th.backends.mps.is_built() and th.backends.mps.is_available()):
+                return th.device("cpu")
+        except:
+            return th.device("cpu")
     if device.type == th.device("cuda").type and not th.cuda.is_available():
         return th.device("cpu")
 
@@ -482,6 +483,8 @@ def obs_as_tensor(obs: Union[np.ndarray, Dict[str, np.ndarray]], device: th.devi
     if isinstance(obs, np.ndarray):
         return th.as_tensor(obs, device=device)
     elif isinstance(obs, dict):
+        if hasattr(th, "backends") and th.backends.mps.is_built():
+            return {key: th.as_tensor(_obs, dtype=th.float32, device=device) for (key, _obs) in obs.items()}
-        if hasattr(th, "backends") and th.backends.mps.is_built():
-            return {key: th.as_tensor(_obs, dtype=th.float32, device=device) for (key, _obs) in obs.items()}
+    elif isinstance(obs, dict):
+        tensor_kwargs = {'device': device}
+        if device.type == 'mps':
+            tensor_kwargs['dtype'] = th.float32
+        return {key: th.as_tensor(_obs, **tensor_kwargs) for (key, _obs) in obs.items()}
-        if hasattr(th, "backends") and th.backends.mps.is_built():
-            return {key: th.as_tensor(_obs, dtype=th.float32, device=device) for (key, _obs) in obs.items()}
+    elif isinstance(obs, dict):
+        tensor_kwargs = {'device': device}
+        if device.type == 'mps':
+            tensor_kwargs['dtype'] = th.float32
+        return {key: th.as_tensor(_obs, **tensor_kwargs) for (key, _obs) in obs.items()}
         return {key: th.as_tensor(_obs, device=device) for (key, _obs) in obs.items()}
     else:
         raise Exception(f"Unrecognized type of observation {type(obs)}")
@@ -515,6 +518,21 @@ def should_collect_more_steps(
         )
 
 
+def get_available_accelerator() -> str:
+    """
+    Return the available accelerator
+    (currently checking only for CUDA and MPS device)
+    """
+    if hasattr(th, "backends") and th.backends.mps.is_built():
+        # MacOS Metal GPU
+        th.set_default_dtype(th.float32)
+        return "mps"
+    elif th.cuda.is_available():
+        return "cuda"
+    else:
+        return "cpu"
+
-def get_available_accelerator() -> str:
-    """
-    Return the available accelerator
-    (currently checking only for CUDA and MPS device)
-    """
-    if hasattr(th, "backends") and th.backends.mps.is_built():
-        # MacOS Metal GPU
-        th.set_default_dtype(th.float32)
-        return "mps"
-    elif th.cuda.is_available():
-        return "cuda"
-    else:
-        return "cpu"
+def get_available_accelerator() -> str:
+    """
+    Return the available accelerator
+    (checking for CUDA and MPS devices)
+    
+    Note: MPS (Apple Metal) requires float32 dtype and may not be available
+    on older macOS versions even if built.
+    
+    Returns:
+        str: The available accelerator type ('mps', 'cuda', or 'cpu')
+    """
+    try:
+        if (hasattr(th, "backends") and th.backends.mps.is_built() 
+            and th.backends.mps.is_available()):
+            # MacOS Metal GPU
+            return "mps"
+    except:  # Catch any MPS-related errors
+        pass
+    elif th.cuda.is_available():
+        return "cuda"
+    else:
+        return "cpu"
-def get_available_accelerator() -> str:
-    """
-    Return the available accelerator
-    (currently checking only for CUDA and MPS device)
-    """
-    if hasattr(th, "backends") and th.backends.mps.is_built():
-        # MacOS Metal GPU
-        th.set_default_dtype(th.float32)
-        return "mps"
-    elif th.cuda.is_available():
-        return "cuda"
-    else:
-        return "cpu"
+def get_available_accelerator() -> str:
+    """
+    Return the available accelerator
+    (checking for CUDA and MPS devices)
+    
+    Note: MPS (Apple Metal) requires float32 dtype and may not be available
+    on older macOS versions even if built.
+    
+    Returns:
+        str: The available accelerator type ('mps', 'cuda', or 'cpu')
+    """
+    try:
+        if (hasattr(th, "backends") and th.backends.mps.is_built() 
+            and th.backends.mps.is_available()):
+            # MacOS Metal GPU
+            return "mps"
+    except:  # Catch any MPS-related errors
+        pass
+    elif th.cuda.is_available():
+        return "cuda"
+    else:
+        return "cpu"
+
 def get_system_info(print_info: bool = True) -> Tuple[Dict[str, str], str]:
     """
     Retrieve system and python env info for the current system.
@@ -530,7 +548,7 @@ def get_system_info(print_info: bool = True) -> Tuple[Dict[str, str], str]:
         "Python": platform.python_version(),
         "Stable-Baselines3": sb3.__version__,
         "PyTorch": th.__version__,
-        "GPU Enabled": str(th.cuda.is_available()),
+        "Accelerator": get_available_accelerator(),
         "Numpy": np.__version__,
         "Cloudpickle": cloudpickle.__version__,
         "Gymnasium": gym.__version__,

diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py
@@ -125,6 +125,20 @@ def _sanity_checks(self) -> None:
                 f"not {self.observation_space}"
             )
 
+    @staticmethod
+    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
+        """
+        Cast `np.float64` reward datatype to `np.float32`,
+        keep the others dtype unchanged.
+
+        :param dtype: The original action space dtype
+        :return: ``np.float32`` if the dtype was float64,
+            the original dtype otherwise.
+        """
+        if reward.dtype == np.float64:
+            return reward.astype(np.float32)
+        return reward
-    @staticmethod
-    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
-        """
-        Cast `np.float64` reward datatype to `np.float32`,
-        keep the others dtype unchanged.
-
-        :param dtype: The original action space dtype
-        :return: ``np.float32`` if the dtype was float64,
-            the original dtype otherwise.
-        """
-        if reward.dtype == np.float64:
-            return reward.astype(np.float32)
-        return reward
+    @staticmethod
+    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
+        """
+        Cast `np.float64` reward datatype to `np.float32`,
+        keep the others dtype unchanged.
+
+        :param reward: The reward array to potentially cast
+        :return: ``np.float32`` if the dtype was float64,
+            the original dtype otherwise.
+        """
+        if not isinstance(reward, np.ndarray):
+            raise TypeError(f"Expected numpy array, got {type(reward)}")
+        if reward.dtype == np.float64:
+            return reward.astype(np.float32)
+        return reward
-    @staticmethod
-    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
-        """
-        Cast `np.float64` reward datatype to `np.float32`,
-        keep the others dtype unchanged.
-
-        :param dtype: The original action space dtype
-        :return: ``np.float32`` if the dtype was float64,
-            the original dtype otherwise.
-        """
-        if reward.dtype == np.float64:
-            return reward.astype(np.float32)
-        return reward
+    @staticmethod
+    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
+        """
+        Cast `np.float64` reward datatype to `np.float32`,
+        keep the others dtype unchanged.
+
+        :param reward: The reward array to potentially cast
+        :return: ``np.float32`` if the dtype was float64,
+            the original dtype otherwise.
+        """
+        if not isinstance(reward, np.ndarray):
+            raise TypeError(f"Expected numpy array, got {type(reward)}")
+        if reward.dtype == np.float64:
+            return reward.astype(np.float32)
+        return reward
+
     def __getstate__(self) -> Dict[str, Any]:
         """
         Gets state for pickling.
@@ -254,7 +268,8 @@ def normalize_reward(self, reward: np.ndarray) -> np.ndarray:
         """
         if self.norm_reward:
             reward = np.clip(reward / np.sqrt(self.ret_rms.var + self.epsilon), -self.clip_reward, self.clip_reward)
-        return reward
+
+        return self._maybe_cast_reward(reward)
 
     def unnormalize_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Union[np.ndarray, Dict[str, np.ndarray]]:
         # Avoid modifying by reference the original object

diff --git a/tests/test_spaces.py b/tests/test_spaces.py
@@ -4,6 +4,7 @@
 import gymnasium as gym
 import numpy as np
 import pytest
+import torch as th
 from gymnasium import spaces
 from gymnasium.spaces.space import Space
 
@@ -151,6 +152,8 @@ def test_discrete_obs_space(model_class, env):
     ],
 )
 def test_float64_action_space(model_class, obs_space, action_space):
+    if hasattr(th, "backends") and th.backends.mps.is_built():
+        pytest.skip("MPS framework doesn't support float64")
-    if hasattr(th, "backends") and th.backends.mps.is_built():
-        pytest.skip("MPS framework doesn't support float64")
+    if hasattr(th.backends, "mps") and th.backends.mps.is_available():
+        pytest.skip("Skipping float64 tests: MPS backend does not support float64 dtype operations")
-    if hasattr(th, "backends") and th.backends.mps.is_built():
-        pytest.skip("MPS framework doesn't support float64")
+    if hasattr(th.backends, "mps") and th.backends.mps.is_available():
+        pytest.skip("Skipping float64 tests: MPS backend does not support float64 dtype operations")
     env = DummyEnv(obs_space, action_space)
     env = gym.wrappers.TimeLimit(env, max_episode_steps=200)
     if isinstance(env.observation_space, spaces.Dict):

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -442,9 +442,10 @@ def test_get_system_info():
     assert info["Stable-Baselines3"] == str(sb3.__version__)
     assert "Python" in info_str
     assert "PyTorch" in info_str
-    assert "GPU Enabled" in info_str
+    assert "Accelerator" in info_str
     assert "Numpy" in info_str
     assert "Gym" in info_str
+    assert "Cloudpickle" in info_str
 
 
 def test_is_vectorized_observation():