diff --git a/d4rl/locomotion/ant.py b/d4rl/locomotion/ant.py index 8b1f2923..b9f7c0a1 100644 --- a/d4rl/locomotion/ant.py +++ b/d4rl/locomotion/ant.py @@ -204,8 +204,8 @@ def reset(self): def set_target(self, target_location=None): return self.set_target_goal(target_location) - def seed(self, seed=0): - mujoco_env.MujocoEnv.seed(self, seed) + def seed(self, seed=None): + mujoco_env.MujocoEnv.seed(self, seed) def make_ant_maze_env(**kwargs): env = AntMazeEnv(**kwargs) diff --git a/d4rl/locomotion/maze_env.py b/d4rl/locomotion/maze_env.py index c6010f2b..1027a6fc 100644 --- a/d4rl/locomotion/maze_env.py +++ b/d4rl/locomotion/maze_env.py @@ -214,13 +214,13 @@ def _xy_to_rowcol(self, xy): def _get_reset_location(self,): prob = (1.0 - self._np_maze_map) / np.sum(1.0 - self._np_maze_map) prob_row = np.sum(prob, 1) - row_sample = np.random.choice(np.arange(self._np_maze_map.shape[0]), p=prob_row) - col_sample = np.random.choice(np.arange(self._np_maze_map.shape[1]), p=prob[row_sample] * 1.0 / prob_row[row_sample]) + row_sample = self.np_random.choice(np.arange(self._np_maze_map.shape[0]), p=prob_row) + col_sample = self.np_random.choice(np.arange(self._np_maze_map.shape[1]), p=prob[row_sample] * 1.0 / prob_row[row_sample]) reset_location = self._rowcol_to_xy((row_sample, col_sample)) # Add some random noise - random_x = np.random.uniform(low=0, high=0.5) * 0.5 * self._maze_size_scaling - random_y = np.random.uniform(low=0, high=0.5) * 0.5 * self._maze_size_scaling + random_x = self.np_random.uniform(low=0, high=0.5) * 0.5 * self._maze_size_scaling + random_y = self.np_random.uniform(low=0, high=0.5) * 0.5 * self._maze_size_scaling return (max(reset_location[0] + random_x, 0), max(reset_location[1] + random_y, 0)) @@ -229,8 +229,8 @@ def _rowcol_to_xy(self, rowcol, add_random_noise=False): x = col * self._maze_size_scaling - self._init_torso_x y = row * self._maze_size_scaling - self._init_torso_y if add_random_noise: - x = x + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) - y = y + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) + x = x + self.np_random.uniform(low=0, high=self._maze_size_scaling * 0.25) + y = y + self.np_random.uniform(low=0, high=self._maze_size_scaling * 0.25) return (x, y) def goal_sampler(self, np_random, only_free_cells=True, interpolate=True): @@ -247,11 +247,11 @@ def goal_sampler(self, np_random, only_free_cells=True, interpolate=True): # If there is a 'goal' designated, use that. Otherwise, any valid cell can # be a goal. sample_choices = goal_cells if goal_cells else valid_cells - cell = sample_choices[np_random.choice(len(sample_choices))] + cell = sample_choices[self.np_random.choice(len(sample_choices))] xy = self._rowcol_to_xy(cell, add_random_noise=True) - random_x = np.random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling - random_y = np.random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling + random_x = self.np_random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling + random_y = self.np_random.uniform(low=0, high=0.5) * 0.25 * self._maze_size_scaling xy = (max(xy[0] + random_x, 0), max(xy[1] + random_y, 0)) @@ -259,7 +259,7 @@ def goal_sampler(self, np_random, only_free_cells=True, interpolate=True): def set_target_goal(self, goal_input=None): if goal_input is None: - self.target_goal = self.goal_sampler(np.random) + self.target_goal = self.goal_sampler(self.np_random) else: self.target_goal = goal_input diff --git a/d4rl/locomotion/wrappers.py b/d4rl/locomotion/wrappers.py index 45b371cd..6d6d2173 100644 --- a/d4rl/locomotion/wrappers.py +++ b/d4rl/locomotion/wrappers.py @@ -22,6 +22,9 @@ def reset(self, **kwargs): def step(self, action): return self._wrapped_env.step(action) + + def seed(self, seed=None): + return self._wrapped_env.seed(seed) def render(self, *args, **kwargs): return self._wrapped_env.render(*args, **kwargs)