Skip to content

Commit

Permalink
Adding trajectory tracking cpo
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico-PizarroBejarano committed Nov 10, 2023
1 parent 1f6b51d commit 2edfd6f
Show file tree
Hide file tree
Showing 214 changed files with 13,837 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: check-yaml
- id: check-toml
- id: check-added-large-files
args: ['--maxkb=10000']
args: ['--maxkb=15000']
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
Expand Down
12 changes: 6 additions & 6 deletions experiments/mpsc/config_overrides/cartpole/cpo_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
algo: cpo
algo_config:
# Model args
hidden1: 32
hidden2: 32
hidden1: 64
hidden2: 64

# Optim args
discount_factor: 0.98
Expand All @@ -16,15 +16,15 @@ algo_config:
cost_d: 0.0

# Runner args
max_steps: 150
num_epochs: 2000
max_steps: 600
num_epochs: 4000
value_epochs: 100
eval_batch_size: 20

# Misc
log_interval: 50
log_interval: 40
save_interval: 0
num_checkpoints: 0
eval_interval: 50
eval_interval: 40
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ algo_config:
cost_d: 0.0

# Runner args
max_steps: 250
num_epochs: 2000
max_steps: 1000
num_epochs: 4000
value_epochs: 100
eval_batch_size: 20

# Misc
log_interval: 50
log_interval: 40
save_interval: 0
num_checkpoints: 0
eval_interval: 50
eval_interval: 40
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ algo_config:
cost_d: 0.0

# Runner args
max_steps: 250
max_steps: 1000
num_epochs: 4000
value_epochs: 150
eval_batch_size: 20

# Misc
log_interval: 50
log_interval: 40
save_interval: 0
num_checkpoints: 0
eval_interval: 50
eval_interval: 40
eval_save_best: True
tensorboard: False
151 changes: 151 additions & 0 deletions experiments/mpsc/models/rl_models/cartpole/track/cpo/none/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
algo: cpo
algo_config:
cost_d: 0.0
cost_v_lr: 0.0002
damping_coeff: 0.01
discount_factor: 0.98
eval_batch_size: 20
eval_interval: 40
eval_save_best: true
filter_train_actions: false
gae_coeff: 0.97
hidden1: 64
hidden2: 64
line_decay: 0.8
log_interval: 40
max_kl: 0.001
max_steps: 600
num_checkpoints: 0
num_conjugate: 10
num_epochs: 4000
penalize_sf_diff: ''
pretrained: ./models/rl_models/cartpole/track/cpo_pretrain/
save_interval: 0
tensorboard: false
training: true
use_safe_reset: ''
v_lr: 0.0002
value_epochs: 100
device: cpu
kv_overrides:
- task_config.init_state=None
- task_config.use_constraint_penalty=False
- sf_config.cost_function=one_step_cost
- sf_config.mpsc_cost_horizon=2
- sf_config.decay_factor=0.85
- sf_config.soften_constraints=True
- algo_config.filter_train_actions=False
- algo_config.use_safe_reset=
- task_config.done_on_violation=
- algo_config.penalize_sf_diff=
- algo_config.pretrained=./models/rl_models/cartpole/track/cpo_pretrain/
output_dir: ./models/rl_models/cartpole/track/cpo/none/
overrides:
- ./config_overrides/cartpole/cpo_cartpole.yaml
- ./config_overrides/cartpole/cartpole_track.yaml
- ./config_overrides/cartpole/nl_mpsc_cartpole_linear.yaml
restore: null
safety_filter: nl_mpsc
seed: 2
sf_config:
cost_function: one_step_cost
decay_factor: 0.85
horizon: 5
integration_algo: LTI
mpsc_cost_horizon: 2
n_samples: 6000
prior_info:
prior_prop: null
prior_prop_rand_info: null
randomize_prior_prop: false
q_lin:
- 0.02
- 0.001
- 10
- 0.5
r_lin:
- 0.1
slack_cost: 200
soften_constraints: true
use_terminal_set: false
warmstart: true
tag: temp
task: cartpole
task_config:
adversary_disturbance: null
adversary_disturbance_offset: 0.0
adversary_disturbance_scale: 0.01
constraint_penalty: -1
constraints:
- constrained_variable: state
constraint_form: default_constraint
lower_bounds:
- -2
- -2
- -0.16
- -1
upper_bounds:
- 2
- 2
- 0.16
- 1
- constrained_variable: input
constraint_form: default_constraint
cost: rl_reward
ctrl_freq: 15
disturbances: null
done_on_out_of_bound: true
done_on_violation: ''
episode_len_sec: 10
gui: false
inertial_prop:
cart_mass: 1
pole_length: 0.5
pole_mass: 0.1
inertial_prop_randomization_info: null
info_in_reset: true
init_state: null
init_state_randomization_info:
init_theta:
distrib: uniform
high: 0.16
low: -0.16
init_theta_dot:
distrib: uniform
high: 1
low: -1
init_x:
distrib: uniform
high: 2
low: -2
init_x_dot:
distrib: uniform
high: 2
low: -2
normalized_rl_action_space: true
obs_goal_horizon: 1
obs_wrap_angle: false
physics: pyb
pyb_freq: 750
randomized_inertial_prop: false
randomized_init: true
rew_act_weight: 0.01
rew_exponential: true
rew_state_weight:
- 1
- 0.01
- 0.01
- 0.01
seed: 42
task: traj_tracking
task_info:
num_cycles: 2
trajectory_plane: zx
trajectory_position_offset:
- 0
- 0
trajectory_scale: 1
trajectory_type: circle
use_constraint_penalty: false
verbose: false
use_gpu: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
step,loss/approx_kl
24000,0.000646044674795121
48000,0.0009329614695161581
72000,0.0009019307908602059
96000,0.000923275772947818
120000,0.0006421623402275145
144000,0.0009480011067353189
168000,0.0009906108025461435
192000,0.0009425210300832987
216000,0.0007847913657315075
240000,0.0007791825337335467
264000,0.0006357821403071284
288000,0.0009230932919308543
312000,0.0009148690733127296
336000,0.00095141347264871
360000,0.0009468137286603451
384000,0.0009762663976289332
408000,0.0006588250398635864
432000,0.0008665641071274877
456000,0.0009641050128266215
480000,0.0009798684623092413
504000,0.0009732079925015569
528000,0.0008943272405304015
552000,0.000996783492155373
576000,0.0009692904422990978
600000,0.0009578225435689092
624000,0.0007605528808198869
648000,0.0009490989032201469
672000,0.0009489719523116946
696000,0.0006397227407433093
720000,0.0009450088837184012
744000,0.0009264686959795654
768000,0.0009609815897420049
792000,0.0008726727101020515
816000,0.0006440887809731066
840000,0.0009824566077440977
864000,0.000682516663800925
888000,0.0009829368209466338
912000,0.0009785661241039634
936000,0.0006422321312129498
960000,0.000898991129361093
984000,0.0009843767620623112
1008000,0.000996031565591693
1032000,0.0006816338864155114
1056000,0.0009720880188979208
1080000,0.000980019336566329
1104000,0.0009498042054474354
1128000,0.0009569736430421472
1152000,0.0006414385861717165
1176000,0.0009859133278951049
1200000,0.0009931768290698528
1224000,0.0006413291557691991
1248000,0.0009956620633602142
1272000,0.0009710220620036125
1296000,0.0006393995136022568
1320000,0.0006447192281484604
1344000,0.0009979369351640344
1368000,0.0006416537798941135
1392000,0.0009277204517275095
1416000,0.0006415481329895556
1440000,0.0009986857185140252
1464000,0.0006506131030619144
1488000,0.000991199049167335
1512000,0.0009983122581616044
1536000,0.0009881022851914167
1560000,0.0006458890275098383
1584000,0.0009984839707612991
1608000,0.000990700558759272
1632000,0.0006463747704401612
1656000,0.000990919885225594
1680000,0.0009260903461836278
1704000,0.0006455002003349364
1728000,0.0009070246014744043
1752000,0.0009915938135236502
1776000,0.0006418480770662427
1800000,0.0009987179655581713
1824000,0.0006425059982575476
1848000,0.0009951347019523382
1872000,0.0009485813789069653
1896000,0.0009963957127183676
1920000,0.0006988556706346571
1944000,0.0009509280789643526
1968000,0.0009975767461583018
1992000,0.000639914651401341
2016000,0.0009169498225674033
2040000,0.0009927612263709307
2064000,0.0006458031130023301
2088000,0.0009963979246094823
2112000,0.0009705353295430541
2136000,0.0006517128786072135
2160000,0.0009954829001799226
2184000,0.0009975056163966656
2208000,0.0006464573089033365
2232000,3.2211271732465177e-15
2256000,0.0006424142047762871
2280000,0.0006442568264901638
2304000,0.0009942648466676474
2328000,0.0009692921885289252
2352000,0.0009863196173682809
2376000,0.0009901811135932803
2400000,0.0009780626278370619
Loading

0 comments on commit 2edfd6f

Please sign in to comment.