Skip to content

Commit

Permalink
Trained each approach 5 times with 5 seeds
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico-PizarroBejarano committed Jun 20, 2024
1 parent f83df4b commit e98b837
Show file tree
Hide file tree
Showing 1,490 changed files with 29,377 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
algo: ppo
algo_config:
activation: tanh
actor_lr: 0.001
clip_obs: 10
clip_param: 0.2
clip_reward: 10
critic_lr: 0.001
deque_size: 10
entropy_coef: 0.01
eval_batch_size: 10
eval_interval: 10000
eval_save_best: true
filter_train_actions: true
gae_lambda: 0.95
gamma: 0.99
hidden_dim: 128
log_interval: 10000
max_env_steps: 250000
max_grad_norm: 0.5
mini_batch_size: 256
norm_obs: false
norm_reward: false
num_checkpoints: 0
num_workers: 1
opt_epochs: 20
penalize_sf_diff: true
rollout_batch_size: 1
rollout_steps: 1000
save_interval: 0
seed: 4077
sf_penalty: 0.1
target_kl: 0.01
tensorboard: false
training: true
use_clipped_value: false
use_gae: true
use_safe_reset: true
device: cpu
kv_overrides:
- sf_config.cost_function=one_step_cost
- algo_config.filter_train_actions=True
- algo_config.penalize_sf_diff=True
- algo_config.use_safe_reset=True
- algo_config.sf_penalty=0.1
- task_config.use_constraint_penalty=False
- task_config.seed=4077
- algo_config.seed=4077
- sf_config.seed=4077
output_dir: ./models/rl_models/ppo/mpsf_0.1/seed_4077
overrides:
- ./config_overrides/crazyflie_track.yaml
- ./config_overrides/ppo_crazyflie.yaml
- ./config_overrides/nl_mpsc.yaml
restore: null
safety_filter: nl_mpsc
seed: null
sf_config:
cost_function: one_step_cost
decay_factor: 0.85
horizon: 10
integration_algo: rk4
mpsc_cost_horizon: 5
n_samples: 600
prior_info:
prior_prop: null
prior_prop_rand_info: null
randomize_prior_prop: false
q_lin:
- 0.8
- 1.85
- 0.8
- 1.85
- 10
- 10
r_lin:
- 2
seed: 4077
use_acados: true
use_terminal_set: false
warmstart: true
tag: temp
task: quadrotor
task_config:
adversary_disturbance: null
adversary_disturbance_offset: 0.0
adversary_disturbance_scale: 0.01
camera_view:
- 5
- -40
- -40
- 0.5
- -1
- 0.5
constraint_penalty: -1
constraints:
- active_dims:
- 0
- 1
- 2
- 3
- 6
- 7
constrained_variable: state
constraint_form: bounded_constraint
lower_bounds:
- -0.95
- -2
- -0.95
- -2
- -0.25
- -0.25
upper_bounds:
- 0.95
- 2
- 0.95
- 2
- 0.25
- 0.25
- constrained_variable: input
constraint_form: default_constraint
cost: quadratic
ctrl_freq: 500
disturbances: null
done_on_out_of_bound: true
done_on_violation: false
episode_len_sec: 15
gui: false
inertial_prop:
Ixx: 1.4e-05
Iyy: 1.4e-05
Izz: 2.17e-05
M: 0.0345
inertial_prop_randomization_info:
Ixx:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
Iyy:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
Izz:
distrib: uniform
high: 1.0e-06
low: -1.0e-06
M:
distrib: uniform
high: 0.0025
low: -0.0025
info_in_reset: true
init_state:
init_p: 0
init_phi: 0
init_psi: 0
init_q: 0
init_r: 0
init_theta: 0
init_x: 0
init_x_dot: 0
init_y: 0
init_y_dot: 0
init_z: 1
init_z_dot: 0
init_state_randomization_info:
init_p:
distrib: uniform
high: 0.5
low: -0.5
init_phi:
distrib: uniform
high: 0.25
low: -0.25
init_psi:
distrib: uniform
high: 0
low: 0
init_q:
distrib: uniform
high: 0.5
low: -0.5
init_r:
distrib: uniform
high: 0
low: 0
init_theta:
distrib: uniform
high: 0.25
low: -0.25
init_x:
distrib: uniform
high: 0.95
low: -0.95
init_x_dot:
distrib: uniform
high: 2
low: -2
init_y:
distrib: uniform
high: 0.95
low: -0.95
init_y_dot:
distrib: uniform
high: 2
low: -2
init_z:
distrib: uniform
high: 1
low: 1
init_z_dot:
distrib: uniform
high: 0
low: 0
norm_act_scale: 0.1
normalized_rl_action_space: false
obs_goal_horizon: 0
physics: pyb
pyb_freq: 1000
quad_type: 3
randomized_inertial_prop: false
randomized_init: false
rew_act_weight: 0.0001
rew_exponential: true
rew_state_weight: 1.0
seed: 4077
task: traj_tracking
task_info:
num_cycles: 1
proj_normal:
- 0
- 1
- 1
proj_point:
- 0
- 0
- 0.5
trajectory_plane: xz
trajectory_position_offset:
- 0
- 1
trajectory_scale: 1
trajectory_type: figure8
use_constraint_penalty: false
verbose: false
use_gpu: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/approx_kl
10000,0.019042028230614957
20000,0.030633506896750373
30000,0.026581679276811578
40000,0.01583208901574835
50000,0.02909967950933302
60000,0.021294071866820256
70000,0.02641607148883244
80000,0.02318945480898644
90000,0.007602035258120544
100000,0.029347860710307334
110000,0.014340489992173387
120000,0.010300176194868983
130000,0.0070840485859662294
140000,0.029392864510494592
150000,0.03212166894615317
160000,0.016977267258334906
170000,0.03371633792412468
180000,0.01640852673444897
190000,0.03348114158725366
200000,0.047364376613404605
210000,0.033984803088242185
220000,0.028994489406856394
230000,0.04291667773892792
240000,0.022212137629200396
250000,0.024327125190757214
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/entropy_loss
10000,-1.8650468170642853
20000,-1.7552332321802773
30000,-1.68878675699234
40000,-1.6304927806059522
50000,-1.5138996839523315
60000,-1.3931940853595732
70000,-1.388410900036494
80000,-1.3235636234283448
90000,-1.2646600544452666
100000,-1.2286749998728435
110000,-1.1855341215928394
120000,-1.178399835030238
130000,-1.1548114120960236
140000,-1.0869037906328838
150000,-1.0821825047334035
160000,-1.0469228804111483
170000,-0.9771186093489328
180000,-0.9463309933741886
190000,-0.98116758565108
200000,-1.0034424304962157
210000,-0.965948274731636
220000,-0.9208977440992993
230000,-0.9026378452777862
240000,-0.876814016699791
250000,-0.7816491693258286
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/policy_loss
10000,-0.005674048569055875
20000,-0.012599202696472678
30000,0.00027775632425281973
40000,-0.0019652279915649993
50000,-0.004312192235563267
60000,-0.011835860187362487
70000,-0.007146836617574167
80000,-0.0014260768091482836
90000,-0.005977628756711357
100000,-0.007523616881241553
110000,-0.005258733822307804
120000,-0.004637148353533257
130000,-0.011222052568600717
140000,-0.003314051613958418
150000,-0.002640003469618418
160000,-0.004508422728718014
170000,-0.004660241787124491
180000,-0.0031596650172195664
190000,0.0010233485010044754
200000,0.0015237208589164585
210000,0.007902507255455686
220000,0.0009553110313142845
230000,-0.009793386831435269
240000,-0.003094141107752506
250000,-0.0027178767337847796
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,loss/value_loss
10000,5.2912078674909235
20000,4.862682331877378
30000,15.647721571981368
40000,9.19444863087563
50000,14.471390021750238
60000,5.1202633909702735
70000,6.919290430232465
80000,9.204161008723448
90000,11.34431719022297
100000,1.371981143216364
110000,4.36495685012213
120000,0.3911020827463573
130000,1.8652524237758008
140000,3.0497116766688506
150000,6.195875225190337
160000,2.9876203859621513
170000,0.6713922070680838
180000,2.134912566155845
190000,0.4016084477118814
200000,6.182694019625103
210000,0.4895621081164337
220000,0.5497357208378352
230000,2.353933525187545
240000,0.4196032263287692
250000,2.4522417304073882
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
step,stat/ep_constraint_violation
10000,57.0
20000,41.0
30000,28.0
40000,21.0
50000,22.0
60000,12.0
70000,4.0
80000,20.0
90000,32.0
100000,7.0
110000,14.0
120000,5.5
130000,8.5
140000,7.5
150000,16.5
160000,17.0
170000,3.0
180000,9.5
190000,9.0
200000,17.5
210000,7.5
220000,8.0
230000,9.0
240000,8.0
250000,14.0
Loading

0 comments on commit e98b837

Please sign in to comment.