Skip to content

Commit

Permalink
Redoing 3D quadrotor experiments with updated parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
pizarrob committed Oct 3, 2023
1 parent e655401 commit b4b1b4a
Show file tree
Hide file tree
Showing 1,193 changed files with 126,629 additions and 44,333 deletions.
2 changes: 2 additions & 0 deletions examples/rl/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ python3 ../../safe_control_gym/experiments/train_rl_controller.py \
--tag unsafe_rl_temp_data/ \
--seed 2 \
--kv_overrides \
task_config.init_state=None \
task_config.randomized_init=True \
algo_config.pretrained=./models/${ALGO}/${ALGO}_pretrain_${SYS}_${TASK}.pt

# Move the newly trained unsafe model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ algo_config:
hidden_dim: 128

# loss args
use_gae: False
use_gae: True
entropy_coef: 0.01

# optim args
opt_epochs: 20
mini_batch_size: 250
mini_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 500000
rollout_batch_size: 1
rollout_steps: 250
rollout_steps: 1000

# misc
log_interval: 10000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ algo_config:

# optim args
opt_epochs: 20
mini_batch_size: 250
mini_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 500000
max_env_steps: 1000000
rollout_batch_size: 1
rollout_steps: 250
rollout_steps: 1000

# misc
log_interval: 10000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ algo_config:
entropy_lr: 0.001

# runner args
max_env_steps: 200000
max_env_steps: 400000
warm_up_steps: 1000
rollout_batch_size: 1
num_workers: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ algo_config:

# optim args
opt_epochs: 20
mini_batch_size: 250
mini_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 500000
max_env_steps: 1000000
rollout_batch_size: 4
rollout_steps: 250
rollout_steps: 1000

# misc
log_interval: 10000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ algo_config:

# optim args
opt_epochs: 20
mini_batch_size: 250
mini_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001

# runner args
max_env_steps: 500000
max_env_steps: 1000000
rollout_batch_size: 4
rollout_steps: 250
rollout_steps: 1000

# misc
log_interval: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ algo_config:
gamma: 0.99
hidden_dim: 128
log_interval: 10000
max_env_steps: 500000
max_env_steps: 1000000
max_grad_norm: 0.5
mini_batch_size: 250
mini_batch_size: 256
norm_obs: false
norm_reward: false
num_checkpoints: 0
Expand All @@ -27,7 +27,7 @@ algo_config:
penalize_sf_diff: false
pretrained: ./models/rl_models/quadrotor_3D/track/ppo_pretrain/
rollout_batch_size: 1
rollout_steps: 250
rollout_steps: 1000
save_interval: 0
sf_penalty: 300
target_kl: 0.01
Expand Down
Original file line number Diff line number Diff line change
@@ -1,51 +1,101 @@
step,loss/approx_kl
10000,-0.024717991618812007
20000,0.019025319523736827
30000,0.012382385968614828
40000,0.018175723604345606
50000,0.025319886843115036
60000,0.022644652653438957
70000,0.02934769309945402
80000,0.021463141971267773
90000,0.015215548057435057
100000,0.013580853524641157
110000,0.015955756712704883
120000,0.007872062907647337
130000,0.01588848564454821
140000,0.011611916146241085
150000,0.01886155008338406
160000,0.019081980279833034
170000,0.015559760406799605
180000,0.030328848503902606
190000,0.019286905233562003
200000,0.018541683400422393
210000,0.014527806176646863
220000,0.015362587738782097
230000,0.01431140082264335
240000,0.014773090709559965
250000,0.01864015379375772
260000,0.01400844187326733
270000,0.015993282847921363
280000,0.015338407243508812
290000,0.030352080686390438
300000,0.014774381899693979
310000,0.01537973804237538
320000,0.018770970499666716
330000,0.016250049970671476
340000,0.020058612478431305
350000,0.0207919909633695
360000,0.01598934678854418
370000,0.0147222041720525
380000,0.014809526793682082
390000,0.019211239343415974
400000,0.02510056388098745
410000,0.016799632357992245
420000,0.019818050437874747
430000,0.01678158791679891
440000,0.0142976885166485
450000,-0.007578600707324234
460000,0.028513307756744412
470000,0.01708139699129383
480000,0.013671855167738967
490000,0.011109500317310506
500000,0.014458024663850821
10000,0.019218411521675687
20000,0.026657083141617476
30000,0.01746212083380669
40000,0.009919317167562742
50000,0.02111920174987366
60000,0.01724486625753343
70000,0.01632642283414801
80000,0.015934750298038126
90000,0.0311812112107873
100000,0.02331677962404986
110000,0.03371710733820995
120000,0.030351283960044377
130000,0.03279125356736282
140000,0.013648889834682145
150000,0.023243115593989686
160000,0.018773438936720295
170000,0.018916703512271246
180000,0.020599004921192925
190000,0.02473791797334949
200000,0.030151523572082322
210000,0.0300494180371364
220000,0.022491315736745794
230000,0.02257620928188165
240000,0.0337772676255554
250000,0.02179785487242043
260000,0.024567173871522147
270000,0.023649742904429637
280000,0.03622848563827573
290000,0.032509950951983536
300000,0.025151518592610955
310000,0.03157009757123887
320000,0.022510625546177228
330000,0.02347554156246285
340000,0.03789273059616486
350000,0.018296154340108235
360000,0.024769589103137455
370000,0.03000542554073036
380000,0.027254434209316968
390000,0.019858964625746014
400000,0.021827833106120433
410000,0.024644354016830523
420000,0.024889172101393343
430000,0.025677736227711044
440000,0.017630940380816657
450000,0.017418021506940322
460000,0.03001865083351732
470000,0.03849459284295638
480000,0.027187612513080238
490000,0.02783633583846191
500000,0.030406417387227214
510000,0.02205329760909081
520000,0.017947127064689997
530000,0.015077587123960257
540000,0.028686774040882785
550000,0.023915739000464478
560000,0.03114956431090831
570000,0.03602626387340327
580000,0.03039917517453432
590000,0.031206621027862026
600000,0.022293894877657292
610000,0.028865009934330978
620000,0.02208978966809809
630000,0.028119065674642722
640000,0.021514978787551323
650000,0.026450703401739394
660000,0.03150768955238163
670000,0.016960548066223657
680000,0.026525620510801668
690000,0.03453690588163834
700000,0.01535425819456577
710000,0.0443646027551343
720000,0.028342895085612933
730000,0.031697115022689105
740000,0.020270801335573196
750000,0.02270585975299279
760000,0.029777690395712847
770000,0.01838145664272209
780000,0.018685533913473287
790000,0.024748957948759195
800000,0.025136707412699855
810000,0.020907793069879214
820000,0.0339710091240704
830000,0.03614181123363475
840000,0.037953167936454216
850000,0.023853258819629748
860000,0.02006980065877239
870000,0.03955576866865158
880000,0.03445271523669362
890000,0.021048704162240025
900000,0.004874991532415152
910000,0.027964027691632504
920000,0.029088548819224035
930000,0.01806592782959342
940000,0.02563529678930839
950000,0.021795741065094867
960000,0.029152677084008854
970000,0.027936897209535037
980000,0.03480880114560326
990000,0.02758963455756505
1000000,0.019105506129562855
Loading

0 comments on commit b4b1b4a

Please sign in to comment.