Skip to content

Commit

Permalink
Adding tests with pos rew x5 and some vel and angle rew
Browse files Browse the repository at this point in the history
  • Loading branch information
pizarrob committed Feb 29, 2024
1 parent 4835f55 commit 4544f4d
Show file tree
Hide file tree
Showing 179 changed files with 2,930 additions and 8 deletions.
4 changes: 2 additions & 2 deletions experiments/crazyflie/config_overrides/crazyflie_track.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ task_config:
disturbances:
observation:
- disturbance_func: white_noise
std: 0.003
std: 0.002
dynamics:
- disturbance_func: white_noise
std: 0.3
std: 0.2
3 changes: 2 additions & 1 deletion experiments/crazyflie/crazyflie_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ def run(gui=False, plot=False, training=False, certify=True, curr_path='.', num_
def get_reward(obs, info, traj):
wp_idx = min(info['current_step']//20, traj.shape[0] - 1) # +1 because state has already advanced but counter not incremented.
state_error = obs[:4] - traj[wp_idx]
dist = np.sum(np.array([2, 0, 2, 0]) * state_error * state_error)
dist = np.sum(np.array([5, 0, 5, 0]) * state_error * state_error)
dist += np.sum(obs[[1,3,6,7]]*obs[[1,3,6,7]])
rew = -dist
rew = np.exp(rew)

Expand Down
246 changes: 246 additions & 0 deletions experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
algo: ppo
algo_config:
activation: tanh
actor_lr: 0.001
clip_obs: 10
clip_param: 0.2
clip_reward: 10
critic_lr: 0.001
deque_size: 10
entropy_coef: 0.01
eval_batch_size: 10
eval_interval: 10000
eval_save_best: true
filter_train_actions: true
gae_lambda: 0.95
gamma: 0.99
hidden_dim: 128
log_interval: 10000
max_env_steps: 250000
max_grad_norm: 0.5
mini_batch_size: 256
norm_obs: false
norm_reward: false
num_checkpoints: 0
num_workers: 1
opt_epochs: 20
penalize_sf_diff: true
rollout_batch_size: 1
rollout_steps: 1000
save_interval: 0
sf_penalty: 0.1
target_kl: 0.01
tensorboard: false
training: true
use_clipped_value: false
use_gae: true
use_safe_reset: true
device: cpu
kv_overrides:
- sf_config.cost_function=one_step_cost
- algo_config.filter_train_actions=True
- algo_config.penalize_sf_diff=True
- algo_config.use_safe_reset=True
- algo_config.sf_penalty=0.1
- task_config.use_constraint_penalty=False
output_dir: ./models/rl_models/ppo/mpsf_0.1_dm_t2
overrides:
- ./config_overrides/crazyflie_track.yaml
- ./config_overrides/ppo_crazyflie.yaml
- ./config_overrides/nl_mpsc.yaml
restore: null
safety_filter: nl_mpsc
seed: null
sf_config:
cost_function: one_step_cost
decay_factor: 0.85
horizon: 10
integration_algo: rk4
mpsc_cost_horizon: 5
n_samples: 600
prior_info:
prior_prop: null
prior_prop_rand_info: null
randomize_prior_prop: false
q_lin:
- 0.008
- 1.85
- 0.008
- 1.85
- 10
- 10
r_lin:
- 2
use_acados: true
use_terminal_set: false
warmstart: true
tag: temp
task: quadrotor
task_config:
adversary_disturbance: null
adversary_disturbance_offset: 0.0
adversary_disturbance_scale: 0.01
camera_view:
- 5
- -40
- -40
- 0.5
- -1
- 0.5
constraint_penalty: -1
constraints:
- active_dims:
- 0
- 1
- 2
- 3
- 6
- 7
constrained_variable: state
constraint_form: bounded_constraint
lower_bounds:
- -0.95
- -2
- -0.95
- -2
- -0.25
- -0.25
upper_bounds:
- 0.95
- 2
- 0.95
- 2
- 0.25
- 0.25
- constrained_variable: input
constraint_form: default_constraint
cost: quadratic
ctrl_freq: 500
disturbances:
dynamics:
- disturbance_func: white_noise
std: 0.2
observation:
- disturbance_func: white_noise
std: 0.002
done_on_out_of_bound: true
done_on_violation: false
episode_len_sec: 15
gui: false
inertial_prop:
Ixx: 1.4e-05
Iyy: 1.4e-05
Izz: 2.17e-05
M: 0.0345
inertial_prop_randomization_info:
Ixx:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
Iyy:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
Izz:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
M:
distrib: uniform
high: 0.0025
low: -0.0025
info_in_reset: true
init_state:
init_p: 0
init_phi: 0
init_psi: 0
init_q: 0
init_r: 0
init_theta: 0
init_x: 0
init_x_dot: 0
init_y: 0
init_y_dot: 0
init_z: 1
init_z_dot: 0
init_state_randomization_info:
init_p:
distrib: uniform
high: 0.5
low: -0.5
init_phi:
distrib: uniform
high: 0.25
low: -0.25
init_psi:
distrib: uniform
high: 0
low: 0
init_q:
distrib: uniform
high: 0.5
low: -0.5
init_r:
distrib: uniform
high: 0
low: 0
init_theta:
distrib: uniform
high: 0.25
low: -0.25
init_x:
distrib: uniform
high: 0.95
low: -0.95
init_x_dot:
distrib: uniform
high: 2
low: -2
init_y:
distrib: uniform
high: 0.95
low: -0.95
init_y_dot:
distrib: uniform
high: 2
low: -2
init_z:
distrib: uniform
high: 1
low: 1
init_z_dot:
distrib: uniform
high: 0
low: 0
norm_act_scale: 0.1
normalized_rl_action_space: false
obs_goal_horizon: 0
physics: pyb
pyb_freq: 1000
quad_type: 3
randomized_inertial_prop: true
randomized_init: false
rew_act_weight: 0.0001
rew_exponential: true
rew_state_weight: 1.0
seed: 1337
task: traj_tracking
task_info:
num_cycles: 1
proj_normal:
- 0
- 1
- 1
proj_point:
- 0
- 0
- 0.5
trajectory_plane: xz
trajectory_position_offset:
- 0
- 1
trajectory_scale: 1
trajectory_type: figure8
use_constraint_penalty: false
verbose: false
use_gpu: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/approx_kl
10000,0.03629405741036559
20000,0.014158800469400984
30000,0.00940396550189083
40000,0.01859933050194134
50000,0.023677004485701522
60000,0.013056748282785219
70000,0.010825660487171263
80000,0.010031700010101001
90000,0.008447419452325751
100000,0.0097706533735618
110000,0.01536899118218571
120000,0.02015554302682479
130000,0.009140072914306073
140000,0.01214529280550778
150000,0.014896025857888162
160000,0.014774406371482956
170000,0.03351424749319752
180000,0.020932470044742024
190000,0.010564974047398817
200000,0.011176506569609047
210000,0.025748851185198873
220000,0.013600206406166156
230000,0.019517522285847612
240000,0.02317161182872951
250000,0.017965512090207386
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/entropy_loss
10000,-1.860471185048421
20000,-1.8969107687473297
30000,-1.9668918271859486
40000,-1.9282902201016738
50000,-1.9003465255101524
60000,-1.9130737761656444
70000,-1.9386168360710143
80000,-1.9204576333363854
90000,-1.801043504476547
100000,-1.6914923290411632
110000,-1.644024852911631
120000,-1.6197641968727112
130000,-1.6847192347049713
140000,-1.6943771878878278
150000,-1.7249806304772695
160000,-1.7251652995745341
170000,-1.6844193081061043
180000,-1.6953525960445404
190000,-1.667886586983999
200000,-1.699438379208247
210000,-1.6885322888692222
220000,-1.6554879188537597
230000,-1.5087747514247893
240000,-1.5377500136693318
250000,-1.4668552458286286
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/policy_loss
10000,-0.0046946849570197536
20000,-0.00837196960013564
30000,-0.009198114681110026
40000,-0.01203073263087373
50000,-0.006172231929076494
60000,-0.010109381468762262
70000,-0.0020931236373119533
80000,-0.018488799211649386
90000,-0.0031121322207710933
100000,-0.003763172075328054
110000,-0.008179799615605495
120000,-0.008566143801081099
130000,-0.019973077587651978
140000,-0.005292802399177931
150000,-0.009406493863578218
160000,-0.008272437623207462
170000,-0.01026959637728307
180000,-0.011772444065824148
190000,-0.008092452344120659
200000,-0.004798340739460206
210000,-0.0009689785466268991
220000,-0.018449735918894934
230000,-0.009661653641431922
240000,-0.0014209499504723388
250000,-0.006043377444478433
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/value_loss
10000,0.8204153413536313
20000,1.6596146752059808
30000,0.5423766622529854
40000,0.6714325899721156
50000,1.6261081726258937
60000,1.7673258683733022
70000,1.7415680847180919
80000,1.7207232450278585
90000,2.746758267532083
100000,3.4835939802467073
110000,2.4956888027752315
120000,0.978582906681362
130000,1.413058683637044
140000,1.6709326064324324
150000,0.8909431496528339
160000,1.4910106884557064
170000,0.6128571866074364
180000,2.394677497472828
190000,0.39955144068614346
200000,1.6228412898725524
210000,0.8637223169056474
220000,0.23593890865875858
230000,0.7480453868748183
240000,2.229012933263951
250000,9.441217708943952
Loading

0 comments on commit 4544f4d

Please sign in to comment.