From 069d089cd0aa67c6f64ffbccdd133f5f078988ad Mon Sep 17 00:00:00 2001 From: StoneT2000 Date: Mon, 22 Jan 2024 22:29:32 -0800 Subject: [PATCH] fix some batching code, only reset and step return numpy on cpu mode --- mani_skill2/envs/sapien_env.py | 37 ++++++++++++++--------------- mani_skill2/envs/tasks/push_cube.py | 1 + mani_skill2/utils/sapien_utils.py | 9 +++---- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py index f29377bfb..d43cbf1de 100644 --- a/mani_skill2/envs/sapien_env.py +++ b/mani_skill2/envs/sapien_env.py @@ -338,7 +338,7 @@ def get_obs(self): if physx.is_gpu_enabled(): return obs else: - return unbatch(to_numpy(obs)) + return unbatch(obs) def _get_obs_state_dict(self): """Get (ground-truth) state-based observations.""" @@ -414,16 +414,16 @@ def reward_mode(self): def get_reward(self, obs: Any, action: Array, info: Dict): if self._reward_mode == "sparse": - return to_tensor(info["success"]) + reward = info["success"] elif self._reward_mode == "dense": - return self.compute_dense_reward(obs=obs, action=action, info=info) + reward = self.compute_dense_reward(obs=obs, action=action, info=info) elif self._reward_mode == "normalized_dense": - return self.compute_normalized_dense_reward( + reward = self.compute_normalized_dense_reward( obs=obs, action=action, info=info ) else: raise NotImplementedError(self._reward_mode) - + return reward def compute_dense_reward(self, obs: Any, action: Array, info: Dict): raise NotImplementedError @@ -545,12 +545,15 @@ def reset(self, seed=None, options=None): self._set_episode_rng(self._episode_seed) self.initialize_episode() + obs = self.get_obs() if physx.is_gpu_enabled(): # ensure all updates to object poses and configurations are applied on GPU after task initialization self._scene._gpu_apply_all() self._scene.px.gpu_update_articulation_kinematics() self._scene._gpu_fetch_all() - return self.get_obs(), {} + else: + obs = to_numpy(obs) + return obs, {} def _set_main_rng(self, seed): """Set the main random generator (e.g., to generate the seed for each episode).""" @@ -616,24 +619,17 @@ def _clear_sim_state(self): # -------------------------------------------------------------------------- # def step(self, action: Union[None, np.ndarray, Dict]): - with sapien.profile("step_action"): - self.step_action(action) + self.step_action(action) self._elapsed_steps += 1 - # TODO (stao): I think evaluation should always occur first before generating observations - # as evaluation is more likely to use privileged information whereas observations only sometimes should include privileged information - with sapien.profile("get_obs"): - obs = self.get_obs() + obs = self.get_obs() info = self.get_info(obs=obs) reward = self.get_reward(obs=obs, action=action, info=info) terminated = info["success"] - if self.num_envs == 1: - terminated = terminated[0] - reward = reward[0] - if physx.is_gpu_enabled(): return obs, reward, terminated, torch.Tensor(False), info else: - return unbatch(obs, reward, terminated.item(), False, to_numpy(info)) + # On CPU sim mode, we always return numpy / python primitives without any batching. + return unbatch(to_numpy(obs), to_numpy(reward), to_numpy(terminated), False, to_numpy(info)) def step_action(self, action): set_action = False @@ -674,8 +670,11 @@ def get_info(self, **kwargs): """ info = dict(elapsed_steps=self._elapsed_steps) info.update(self.evaluate(**kwargs)) - return info - + if physx.is_gpu_enabled(): + return info + else: + return unbatch(info) + def _before_control_step(self): pass diff --git a/mani_skill2/envs/tasks/push_cube.py b/mani_skill2/envs/tasks/push_cube.py index 037287d97..79feefdde 100644 --- a/mani_skill2/envs/tasks/push_cube.py +++ b/mani_skill2/envs/tasks/push_cube.py @@ -38,6 +38,7 @@ @register_env("PushCube-v0", max_episode_steps=50) class PushCubeEnv(BaseEnv): + # Specify some supported robot types agent: Union[Panda, Xmate3Robotiq] # set some commonly used values diff --git a/mani_skill2/utils/sapien_utils.py b/mani_skill2/utils/sapien_utils.py index fcb8141b0..646bca4fc 100644 --- a/mani_skill2/utils/sapien_utils.py +++ b/mani_skill2/utils/sapien_utils.py @@ -47,12 +47,9 @@ def to_tensor(array: Union[torch.Tensor, np.array, Sequence]): def _to_numpy(array: Union[Array, Sequence]) -> np.ndarray: if isinstance(array, (dict)): return {k: _to_numpy(v) for k, v in array.items()} - if isinstance(array, str): - return array - if torch is not None: - if isinstance(array, torch.Tensor): - return array.cpu().numpy() - if isinstance(array, np.ndarray): + if isinstance(array, torch.Tensor): + return array.cpu().numpy() + if isinstance(array, np.ndarray) or isinstance(array, bool) or isinstance(array, str) or isinstance(array, float) or isinstance(array, int): return array else: return np.array(array)