Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Support for robosuite environments #3

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
8 changes: 8 additions & 0 deletions cfg/behavior_policies/iris.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
teach_behavior_policy: acteach
use_meta_target: False

behavior_policy_params:
commitment_thresh: 0.6
with_commitment: True
use_learner: True
policy_choice_repeat: 30 # added to allow for policy to act for a longer period of time
28 changes: 28 additions & 0 deletions cfg/behavior_policies/iris_dqn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
teach_behavior_policy: dqn

behavior_policy_params:
commit_time: 0
do_exploration: True
num_timesteps: 100000
policy_choice_repeat: 30

dqn_params:
gamma: 0.99
learning_rate: 5.0e-4
buffer_size: 100000
exploration_fraction: 0.1
exploration_final_eps: 0.02
train_freq: 10
batch_size: 32
checkpoint_freq: 10000
checkpoint_path: null
learning_starts: 100
target_network_update_freq: 1000
prioritized_replay: False
prioritized_replay_alpha: 0.6
prioritized_replay_beta0: 0.4
prioritized_replay_beta_iters: 3000000
prioritized_replay_eps: 1.0e-6
param_noise: False
verbose: 1
full_tensorboard_log: False
9 changes: 9 additions & 0 deletions cfg/behavior_policies/iris_ep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
teach_behavior_policy: acteach
use_meta_target: False

behavior_policy_params:
commitment_thresh: 0.6
with_commitment: True
use_learner: True
policy_choice_repeat: 30
random_episode_choice: True
39 changes: 39 additions & 0 deletions cfg/sawyer_can/base/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
env_id: SawyerPickPlaceCan
learner_type: DDPG
render: False
render_eval: False
normalize_returns: False
normalize_observations: False
seed: next
tau: 0.001
critic_l2_reg: 0.0
batch_size: 128 # per MPI worker
actor_lr: 0.0001
critic_lr: 0.001
enable_popart: False
gamma: 0.99
reward_scale: 1
clip_norm: null
noise_type: normal_0.2 # choices are adaptive-param_xx, ou_xx, normal_xx, none
load_path: null

memory_limit: 1000000
nb_train_steps: 50 # per epoch cycle and MPI worker
nb_rollout_steps: 1000 #200 # per epoch cycle and MPI worker
num_timesteps: 500000
nb_eval_steps: 1000 #100 # per epoch cycle and MPI worker
log_interval: 25
verbose: 1
do_eval: True

use_meta_target: False
teach_behavior_policy: null

policy_kwargs:
layer_norm: True
layers: [64, 64, 64]
feature_extraction: mlp # Can be mlp or cnn

env_params:
shuffle_order: True
render_q_quiver: True
41 changes: 41 additions & 0 deletions cfg/sawyer_can/base/train_dropout.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
env_id: SawyerPickPlaceCan
learner_type: Dropout DDPG
render: False
render_eval: False
normalize_returns: False
normalize_observations: False
seed: next
critic_l2_reg: 0.0
action_l2: 0.1
tau: 0.001
batch_size: 128 # per MPI worker
actor_lr: 0.0001
critic_lr: 0.001
enable_popart: False
gamma: 0.99
reward_scale: 1
clip_norm: null
noise_type: normal_0.2 # choices are adaptive-param_xx, ou_xx, normal_xx, none
load_path: null

memory_limit: 1000000
nb_train_steps: 50 # per epoch cycle and MPI worker
nb_rollout_steps: 1000 #200 # per epoch cycle and MPI worker
num_timesteps: 500000
nb_eval_steps: 1000 #100 # per epoch cycle and MPI worker
log_interval: 25
verbose: 1
do_eval: True

use_meta_target: False
teach_behavior_policy: null

dropout_tau: 10.0
include_mc_stats: True

policy_kwargs:
dropout_keep_prob: 0.9
layers: [64, 64, 64]
mc_samples: 50
layer_norm: True
feature_extraction: mlp # Can be mlp or cnn
15 changes: 15 additions & 0 deletions cfg/sawyer_can/eval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml

env_id: SawyerPickPlaceCan
render: True
render_eval: True
noise_type: normal_0.0000001 # can try null here, may need a bit of debugging

load_path: tmp/model_hour_11.pkl
nb_train_steps: 0 # per epoch cycle and MPI worker
nb_rollout_steps: 200 # per epoch cycle and MPI worker
nb_eval_steps: 200 # per epoch cycle and MPI worker
log_interval: 1
verbose: 1
do_eval: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train.yaml
- cfg/sawyer_can/teachers/full_optimal.yaml
- cfg/behavior_policies/critic.yaml

experiment_name: efficiency_1full_optimal_ddpgcritic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_optimal.yaml
- cfg/behavior_policies/dqn.yaml

experiment_name: efficiency_1full_optimal_dqn
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_optimal.yaml
- cfg/behavior_policies/iris.yaml

experiment_name: efficiency_1full_optimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_optimal.yaml
- cfg/behavior_policies/acteach.yaml

experiment_name: efficiency_1full_optimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_optimal.yaml
- cfg/behavior_policies/random.yaml

experiment_name: efficiency_1full_optimal_random
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train.yaml
- cfg/sawyer_can/teachers/full_suboptimal.yaml
- cfg/behavior_policies/critic.yaml

experiment_name: efficiency_1full_suboptimal_ddpgcritic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_suboptimal.yaml
- cfg/behavior_policies/dqn.yaml

experiment_name: efficiency_1full_suboptimal_dqn
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_suboptimal.yaml
- cfg/behavior_policies/iris.yaml

experiment_name: efficiency_1full_suboptimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_suboptimal.yaml
- cfg/behavior_policies/acteach.yaml

experiment_name: efficiency_1full_suboptimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml
- cfg/sawyer_can/teachers/full_suboptimal.yaml
- cfg/behavior_policies/random.yaml

experiment_name: efficiency_1full_suboptimal_random
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include:
- cfg/sawyer_can/base/train_dropout.yaml

experiment_name: efficiency_no_teachers_ddpg
4 changes: 4 additions & 0 deletions cfg/sawyer_can/teachers/full_optimal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
teachers:
- type: optimal
- env: SawyerPickPlaceCan
- agent: "/home/robot/installed_libraries/batchRL/iris_trained_models/"
3 changes: 3 additions & 0 deletions cfg/sawyer_can/teachers/full_suboptimal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
teachers:
- type: optimal
noise_type: normal_0.1
39 changes: 39 additions & 0 deletions cfg/sawyer_lift/base/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
env_id: SawyerLift
learner_type: DDPG
render: False
render_eval: False
normalize_returns: False
normalize_observations: False
seed: next
tau: 0.001
critic_l2_reg: 0.0
batch_size: 128 # per MPI worker
actor_lr: 0.0001
critic_lr: 0.001
enable_popart: False
gamma: 0.99
reward_scale: 1
clip_norm: null
noise_type: normal_0.2 # choices are adaptive-param_xx, ou_xx, normal_xx, none
load_path: null

memory_limit: 1000000
nb_train_steps: 50 # per epoch cycle and MPI worker
nb_rollout_steps: 1000 #200 # per epoch cycle and MPI worker
num_timesteps: 500000
nb_eval_steps: 1000 #100 # per epoch cycle and MPI worker
log_interval: 25
verbose: 1
do_eval: True

use_meta_target: False
teach_behavior_policy: null

policy_kwargs:
layer_norm: True
layers: [64, 64, 64]
feature_extraction: mlp # Can be mlp or cnn

env_params:
shuffle_order: True
render_q_quiver: True
41 changes: 41 additions & 0 deletions cfg/sawyer_lift/base/train_dropout.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
env_id: SawyerLift
learner_type: Dropout DDPG
render: False
render_eval: False
normalize_returns: False
normalize_observations: False
seed: next
critic_l2_reg: 0.0
action_l2: 0.1
tau: 0.001
batch_size: 128 # per MPI worker
actor_lr: 0.0001
critic_lr: 0.001
enable_popart: False
gamma: 0.99
reward_scale: 1
clip_norm: null
noise_type: normal_0.2 # choices are adaptive-param_xx, ou_xx, normal_xx, none
load_path: null

memory_limit: 1000000
nb_train_steps: 50 # per epoch cycle and MPI worker
nb_rollout_steps: 1000 #200 # per epoch cycle and MPI worker
num_timesteps: 500000
nb_eval_steps: 1000 #100 # per epoch cycle and MPI worker
log_interval: 25
verbose: 1
do_eval: True

use_meta_target: False
teach_behavior_policy: null

dropout_tau: 10.0
include_mc_stats: True

policy_kwargs:
dropout_keep_prob: 0.9
layers: [64, 64, 64]
mc_samples: 50
layer_norm: True
feature_extraction: mlp # Can be mlp or cnn
15 changes: 15 additions & 0 deletions cfg/sawyer_lift/eval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml

env_id: SawyerLift
render: True
render_eval: True
noise_type: normal_0.0000001 # can try null here, may need a bit of debugging

load_path: "/Users/ajaymandlekar/Desktop/Google Drive/Stanford/ccr/ac-teach/logs/SawyerLift/efficiency_no_teachers_ddpg/seed_0/model.pkl"
nb_train_steps: 0 # per epoch cycle and MPI worker
nb_rollout_steps: 1000 #200 # per epoch cycle and MPI worker
nb_eval_steps: 1000 #200 # per epoch cycle and MPI worker
log_interval: 1
verbose: 1
do_eval: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/critic.yaml

experiment_name: efficiency_1full_optimal_ddpgcritic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/dqn.yaml

experiment_name: efficiency_1full_optimal_dqn
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/iris.yaml

experiment_name: efficiency_1full_optimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/iris_dqn.yaml

experiment_name: efficiency_1full_optimal_iris_dqn
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/iris_ep.yaml

experiment_name: efficiency_1full_optimal_iris_ep
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/acteach.yaml

experiment_name: efficiency_1full_optimal_ours
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_optimal.yaml
- cfg/behavior_policies/random.yaml

experiment_name: efficiency_1full_optimal_random
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train.yaml
- cfg/sawyer_lift/teachers/full_suboptimal.yaml
- cfg/behavior_policies/critic.yaml

experiment_name: efficiency_1full_suboptimal_ddpgcritic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include:
- cfg/sawyer_lift/base/train_dropout.yaml
- cfg/sawyer_lift/teachers/full_suboptimal.yaml
- cfg/behavior_policies/dqn.yaml

experiment_name: efficiency_1full_suboptimal_dqn
Loading