diff --git a/d4rl/locomotion/__init__.py b/d4rl/locomotion/__init__.py index e87d0974..bbdd3905 100644 --- a/d4rl/locomotion/__init__.py +++ b/d4rl/locomotion/__init__.py @@ -29,7 +29,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_u-maze_noisy_multistart_False_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -46,7 +46,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_u-maze_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -63,7 +63,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_big-maze_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -80,7 +80,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_big-maze_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -97,7 +97,7 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_hardest-maze_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -114,7 +114,7 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_hardest-maze_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -131,7 +131,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_umaze_noisy_multistart_False_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -148,7 +148,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_umaze_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -165,7 +165,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_medium_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -182,7 +182,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_medium_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -199,7 +199,7 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_large_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -216,7 +216,7 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v1/Ant_maze_large_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -232,7 +232,7 @@ 'maze_map': maze_env.U_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_umaze_eval_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -248,7 +248,7 @@ 'maze_map': maze_env.U_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_umaze_eval_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -264,7 +264,7 @@ 'maze_map': maze_env.BIG_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_medium_eval_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -280,7 +280,7 @@ 'maze_map': maze_env.BIG_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_medium_eval_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -296,7 +296,7 @@ 'maze_map': maze_env.HARDEST_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_large_eval_noisy_multistart_True_multigoal_False_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -312,7 +312,7 @@ 'maze_map': maze_env.HARDEST_MAZE_EVAL_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_new/Ant_maze_large_eval_noisy_multistart_True_multigoal_True_sparse.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -329,7 +329,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_u-maze_noisy_multistart_False_multigoal_False_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -346,7 +346,7 @@ 'maze_map': maze_env.U_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_u-maze_noisy_multistart_True_multigoal_True_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -363,7 +363,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_big-maze_noisy_multistart_True_multigoal_False_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -380,7 +380,7 @@ 'maze_map': maze_env.BIG_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_big-maze_noisy_multistart_True_multigoal_True_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -397,7 +397,7 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_hardest-maze_noisy_multistart_True_multigoal_True_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, @@ -414,7 +414,41 @@ 'maze_map': maze_env.HARDEST_MAZE_TEST, 'reward_type':'sparse', 'dataset_url':'http://rail.eecs.berkeley.edu/datasets/offline_rl/ant_maze_v2/Ant_maze_hardest-maze_noisy_multistart_True_multigoal_False_sparse_fixed.hdf5', - 'non_zero_reset':False, + 'non_zero_reset':False, + 'eval':True, + 'maze_size_scaling': 4.0, + 'ref_min_score': 0.0, + 'ref_max_score': 1.0, + 'v2_resets': True, + } +) + +register( + id='antmaze-ultra-diverse-v2', + entry_point='d4rl.locomotion.ant:make_ant_maze_env', + max_episode_steps=2000, + kwargs={ + 'maze_map': maze_env.ULTRA_MAZE_TEST, + 'reward_type':'sparse', + 'non_zero_reset':False, + 'dataset_url':'https://github.com/ZhengyaoJiang/d4rl/releases/download/public/Ant_maze_ultra_noisy_multistart_True_multigoal_True_sparse.hdf5', + 'eval':True, + 'maze_size_scaling': 4.0, + 'ref_min_score': 0.0, + 'ref_max_score': 1.0, + 'v2_resets': True, + } +) + +register( + id='antmaze-ultra-play-v2', + entry_point='d4rl.locomotion.ant:make_ant_maze_env', + max_episode_steps=2000, + kwargs={ + 'maze_map': maze_env.ULTRA_MAZE_TEST, + 'reward_type':'sparse', + 'non_zero_reset':False, + 'dataset_url':'https://github.com/ZhengyaoJiang/d4rl/releases/download/public/Ant_maze_ultra_noisy_multistart_True_multigoal_False_sparse.hdf5', 'eval':True, 'maze_size_scaling': 4.0, 'ref_min_score': 0.0, diff --git a/d4rl/locomotion/maze_env.py b/d4rl/locomotion/maze_env.py index c6010f2b..140f7fd0 100644 --- a/d4rl/locomotion/maze_env.py +++ b/d4rl/locomotion/maze_env.py @@ -52,6 +52,22 @@ [1, 0, 0, 1, G, 0, G, 1, 0, G, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] +ULTRA_MAZE = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, R, 0, 0, G, 0, 0, 0, 1, 0, 0, G, 0, 0, G, 1], + [1, 0, 1, 1, 1, G, 1, 0, 1, G, 1, 1, 0, 1, 0, 1], + [1, 0, 1, 1, 1, 0, 1, G, 0, 0, 0, 1, 0, 1, 0, 1], + [1, 0, 0, G, 1, 0, 1, 1, 0, 1, 1, 1, G, 1, 0, 1], + [1, G, 1, 0, 0, 0, 1, G, 0, 0, G, 0, 0, 0, G, 1], + [1, 0, 0, 0, 1, G, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1], + [1, 0, 1, G, 1, 1, 1, 0, 1, 0, 0, G, 1, 0, 1, 1], + [1, G, 0, 0, 0, 0, 0, G, 1, 0, 1, 0, 0, G, 0, 1], + [1, 1, 0, 1, 1, G, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1], + [1, 1, 0, 1, 0, 0, 1, 0, 1, G, 0, 0, 0, 0, G, 1], + [1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]] + + + + # Maze specifications with a single target goal U_MAZE_TEST = [[1, 1, 1, 1, 1], [1, R, 0, 0, 1], @@ -59,6 +75,7 @@ [1, G, 0, 0, 1], [1, 1, 1, 1, 1]] +# effective size 6*6 BIG_MAZE_TEST = [[1, 1, 1, 1, 1, 1, 1, 1], [1, R, 0, 1, 1, 0, 0, 1], [1, 0, 0, 1, 0, 0, 0, 1], @@ -68,6 +85,7 @@ [1, 0, 0, 0, 1, 0, G, 1], [1, 1, 1, 1, 1, 1, 1, 1]] +# effective size 7*10 HARDEST_MAZE_TEST = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, R, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1], @@ -78,6 +96,21 @@ [1, 0, 0, 1, 0, 0, 0, 1, 0, G, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] +# effective size 14*14 -> 10*14 +ULTRA_MAZE_TEST = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, R, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1], + [1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1], + [1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1], + [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1], + [1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1], + [1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1], + [1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, G, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] + + # Maze specifications for evaluation U_MAZE_EVAL = [[1, 1, 1, 1, 1], [1, 0, 0, R, 1], @@ -208,8 +241,8 @@ def __init__( def _xy_to_rowcol(self, xy): size_scaling = self._maze_size_scaling xy = (max(xy[0], 1e-4), max(xy[1], 1e-4)) - return (int(1 + (xy[1]) / size_scaling), - int(1 + (xy[0]) / size_scaling)) + return (int((xy[1]+self._init_torso_y+0.25*size_scaling) / size_scaling), + int((xy[0]+self._init_torso_x+0.25*size_scaling) / size_scaling)) def _get_reset_location(self,): prob = (1.0 - self._np_maze_map) / np.sum(1.0 - self._np_maze_map) @@ -226,11 +259,11 @@ def _get_reset_location(self,): def _rowcol_to_xy(self, rowcol, add_random_noise=False): row, col = rowcol - x = col * self._maze_size_scaling - self._init_torso_x - y = row * self._maze_size_scaling - self._init_torso_y + x = col * self._maze_size_scaling - self._init_torso_x - 0.125*self._maze_size_scaling + y = row * self._maze_size_scaling - self._init_torso_y - 0.125*self._maze_size_scaling if add_random_noise: - x = x + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) - y = y + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) + x = x + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) - 0.125*self._maze_size_scaling + y = y + np.random.uniform(low=0, high=self._maze_size_scaling * 0.25) - 0.125*self._maze_size_scaling return (x, y) def goal_sampler(self, np_random, only_free_cells=True, interpolate=True): @@ -313,6 +346,8 @@ def _get_best_next_rowcol(self, current_rowcol, target_rowcol): visited = {} to_visit = [target_rowcol] + if self._maze_map[current_rowcol[0]][current_rowcol[1]] not in [0, RESET, GOAL]: + print() while to_visit: next_visit = [] for rowcol in to_visit: @@ -337,7 +372,7 @@ def _get_best_next_rowcol(self, current_rowcol, target_rowcol): next_visit.append(next_rowcol) to_visit = next_visit - raise ValueError('No path found to target.') + raise ValueError(f'No path found from {current_rowcol} to targe {target_rowcol}.') def create_navigation_policy(self, goal_reaching_policy_fn, diff --git a/scripts/generation/generate_ant_maze_datasets.py b/scripts/generation/generate_ant_maze_datasets.py index 6c567d5e..3b06a4b9 100644 --- a/scripts/generation/generate_ant_maze_datasets.py +++ b/scripts/generation/generate_ant_maze_datasets.py @@ -75,6 +75,8 @@ def main(): maze = maze_env.BIG_MAZE elif args.maze == 'large': maze = maze_env.HARDEST_MAZE + elif args.maze == 'ultra': + maze = maze_env.ULTRA_MAZE elif args.maze == 'umaze_eval': maze = maze_env.U_MAZE_EVAL elif args.maze == 'medium_eval': @@ -124,7 +126,13 @@ def _goal_reaching_policy_fn(obs, goal): ts = 0 num_episodes = 0 for _ in range(args.num_samples): - act, waypoint_goal = data_collection_policy(s) + try: + act, waypoint_goal = data_collection_policy(s) + except Exception as e: + print(e) + #curr_frame = env.physics.render(width=500, height=500, depth=False) + #frames = np.array([curr_frame]) + #save_video('./videos/', args.env + '_navigation', frames, num_episodes) if args.noisy: act = act + np.random.randn(*act.shape)*0.2