diff --git a/experiments/crazyflie/config_overrides/crazyflie_track.yaml b/experiments/crazyflie/config_overrides/crazyflie_track.yaml index 6f1b8b3c2..39f322dff 100644 --- a/experiments/crazyflie/config_overrides/crazyflie_track.yaml +++ b/experiments/crazyflie/config_overrides/crazyflie_track.yaml @@ -135,7 +135,7 @@ task_config: disturbances: observation: - disturbance_func: white_noise - std: 0.003 + std: 0.002 dynamics: - disturbance_func: white_noise - std: 0.3 + std: 0.2 diff --git a/experiments/crazyflie/crazyflie_experiment.py b/experiments/crazyflie/crazyflie_experiment.py index 83cbedbd7..aa07372ab 100644 --- a/experiments/crazyflie/crazyflie_experiment.py +++ b/experiments/crazyflie/crazyflie_experiment.py @@ -217,7 +217,8 @@ def run(gui=False, plot=False, training=False, certify=True, curr_path='.', num_ def get_reward(obs, info, traj): wp_idx = min(info['current_step']//20, traj.shape[0] - 1) # +1 because state has already advanced but counter not incremented. state_error = obs[:4] - traj[wp_idx] - dist = np.sum(np.array([2, 0, 2, 0]) * state_error * state_error) + dist = np.sum(np.array([5, 0, 5, 0]) * state_error * state_error) + dist += np.sum(obs[[1,3,6,7]]*obs[[1,3,6,7]]) rew = -dist rew = np.exp(rew) diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/config.yaml new file mode 100644 index 000000000..6880e5da1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 10000 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 10000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 0.1 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=0.1 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_0.1_dm_t2 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/approx_kl.log new file mode 100644 index 000000000..c7e704137 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/approx_kl.log @@ -0,0 +1,26 @@ +step,loss/approx_kl +10000,0.03629405741036559 +20000,0.014158800469400984 +30000,0.00940396550189083 +40000,0.01859933050194134 +50000,0.023677004485701522 +60000,0.013056748282785219 +70000,0.010825660487171263 +80000,0.010031700010101001 +90000,0.008447419452325751 +100000,0.0097706533735618 +110000,0.01536899118218571 +120000,0.02015554302682479 +130000,0.009140072914306073 +140000,0.01214529280550778 +150000,0.014896025857888162 +160000,0.014774406371482956 +170000,0.03351424749319752 +180000,0.020932470044742024 +190000,0.010564974047398817 +200000,0.011176506569609047 +210000,0.025748851185198873 +220000,0.013600206406166156 +230000,0.019517522285847612 +240000,0.02317161182872951 +250000,0.017965512090207386 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/entropy_loss.log new file mode 100644 index 000000000..b16613263 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/entropy_loss.log @@ -0,0 +1,26 @@ +step,loss/entropy_loss +10000,-1.860471185048421 +20000,-1.8969107687473297 +30000,-1.9668918271859486 +40000,-1.9282902201016738 +50000,-1.9003465255101524 +60000,-1.9130737761656444 +70000,-1.9386168360710143 +80000,-1.9204576333363854 +90000,-1.801043504476547 +100000,-1.6914923290411632 +110000,-1.644024852911631 +120000,-1.6197641968727112 +130000,-1.6847192347049713 +140000,-1.6943771878878278 +150000,-1.7249806304772695 +160000,-1.7251652995745341 +170000,-1.6844193081061043 +180000,-1.6953525960445404 +190000,-1.667886586983999 +200000,-1.699438379208247 +210000,-1.6885322888692222 +220000,-1.6554879188537597 +230000,-1.5087747514247893 +240000,-1.5377500136693318 +250000,-1.4668552458286286 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/policy_loss.log new file mode 100644 index 000000000..d49027636 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/policy_loss.log @@ -0,0 +1,26 @@ +step,loss/policy_loss +10000,-0.0046946849570197536 +20000,-0.00837196960013564 +30000,-0.009198114681110026 +40000,-0.01203073263087373 +50000,-0.006172231929076494 +60000,-0.010109381468762262 +70000,-0.0020931236373119533 +80000,-0.018488799211649386 +90000,-0.0031121322207710933 +100000,-0.003763172075328054 +110000,-0.008179799615605495 +120000,-0.008566143801081099 +130000,-0.019973077587651978 +140000,-0.005292802399177931 +150000,-0.009406493863578218 +160000,-0.008272437623207462 +170000,-0.01026959637728307 +180000,-0.011772444065824148 +190000,-0.008092452344120659 +200000,-0.004798340739460206 +210000,-0.0009689785466268991 +220000,-0.018449735918894934 +230000,-0.009661653641431922 +240000,-0.0014209499504723388 +250000,-0.006043377444478433 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/value_loss.log new file mode 100644 index 000000000..289e6251d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/loss/value_loss.log @@ -0,0 +1,26 @@ +step,loss/value_loss +10000,0.8204153413536313 +20000,1.6596146752059808 +30000,0.5423766622529854 +40000,0.6714325899721156 +50000,1.6261081726258937 +60000,1.7673258683733022 +70000,1.7415680847180919 +80000,1.7207232450278585 +90000,2.746758267532083 +100000,3.4835939802467073 +110000,2.4956888027752315 +120000,0.978582906681362 +130000,1.413058683637044 +140000,1.6709326064324324 +150000,0.8909431496528339 +160000,1.4910106884557064 +170000,0.6128571866074364 +180000,2.394677497472828 +190000,0.39955144068614346 +200000,1.6228412898725524 +210000,0.8637223169056474 +220000,0.23593890865875858 +230000,0.7480453868748183 +240000,2.229012933263951 +250000,9.441217708943952 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..921fb7766 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_constraint_violation.log @@ -0,0 +1,26 @@ +step,stat/ep_constraint_violation +10000,203.0 +20000,422.0 +30000,280.5 +40000,363.0 +50000,336.0 +60000,345.5 +70000,369.5 +80000,170.5 +90000,90.0 +100000,125.5 +110000,98.5 +120000,62.5 +130000,48.5 +140000,70.0 +150000,64.0 +160000,68.5 +170000,40.0 +180000,49.5 +190000,41.0 +200000,24.0 +210000,30.5 +220000,12.5 +230000,3.0 +240000,24.0 +250000,113.5 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_length.log new file mode 100644 index 000000000..485c09e10 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_length.log @@ -0,0 +1,26 @@ +step,stat/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_return.log new file mode 100644 index 000000000..b542744d2 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_return.log @@ -0,0 +1,26 @@ +step,stat/ep_return +10000,23.264366629008652 +20000,18.38791166831524 +30000,25.700775008106543 +40000,26.690523073681256 +50000,29.873756677366842 +60000,37.32913450024621 +70000,31.903863806724377 +80000,55.901466752903104 +90000,66.01495776891869 +100000,86.5448101198637 +110000,76.49554169758018 +120000,80.87524181284832 +130000,84.50563483908095 +140000,79.76230475398836 +150000,86.5639255786366 +160000,81.216761662187 +170000,88.88004138349055 +180000,83.37462742709191 +190000,92.80979317592951 +200000,102.68921226306296 +210000,94.87939693792428 +220000,100.54204219999727 +230000,101.2981578863523 +240000,99.6266289113471 +250000,92.07602435923229 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_reward.log new file mode 100644 index 000000000..21d2cdcf9 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat/ep_reward.log @@ -0,0 +1,26 @@ +step,stat/ep_reward +10000,0.06203831101068974 +20000,0.0490344311155073 +30000,0.06853540002161745 +40000,0.07117472819648335 +50000,0.07966335113964491 +60000,0.09954435866732322 +70000,0.085076970151265 +80000,0.1490705780077416 +90000,0.17603988738378318 +100000,0.23078616031963653 +110000,0.20398811119354715 +120000,0.21566731150092883 +130000,0.22534835957088256 +140000,0.21269947934396896 +150000,0.23083713487636429 +160000,0.21657803109916532 +170000,0.23701344368930813 +180000,0.22233233980557843 +190000,0.2474927818024787 +200000,0.2738378993681679 +210000,0.25301172516779813 +220000,0.268112112533326 +230000,0.2701284210302728 +240000,0.2656710104302589 +250000,0.2455360649579528 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..6e4d7c90a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/constraint_violation.log @@ -0,0 +1,26 @@ +step,stat_eval/constraint_violation +10000,449.2 +20000,377.3 +30000,379.0 +40000,421.1 +50000,383.5 +60000,359.7 +70000,221.6 +80000,247.9 +90000,121.1 +100000,88.9 +110000,93.2 +120000,75.0 +130000,64.7 +140000,49.4 +150000,54.6 +160000,57.3 +170000,60.3 +180000,33.9 +190000,29.3 +200000,30.3 +210000,22.1 +220000,10.5 +230000,4.8 +240000,1.8 +250000,4.7 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..d1490483d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_length.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..5f73df4b8 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_return.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_return +10000,14.084603288503288 +20000,20.429731156657972 +30000,26.99837687113567 +40000,27.84234353673661 +50000,29.6708009378688 +60000,36.39426614933185 +70000,48.83658842868503 +80000,57.76452166503873 +90000,77.95878381077266 +100000,82.39028628975414 +110000,82.79192693974866 +120000,78.09598702961857 +130000,82.34081423003332 +140000,89.60296650682459 +150000,88.92773463180671 +160000,91.70148455357831 +170000,88.17556763887976 +180000,95.92661746400731 +190000,100.47953157432985 +200000,100.8392547098346 +210000,100.21489542102202 +220000,109.42574689422875 +230000,107.31043101499645 +240000,112.65786599727414 +250000,115.70921975487033 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..8d61a2268 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/ep_reward.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_reward +10000,0.037558942102675436 +20000,0.05447928308442127 +30000,0.07199567165636178 +40000,0.07424624943129762 +50000,0.07912213583431682 +60000,0.09705137639821826 +70000,0.13023090247649344 +80000,0.1540387244401033 +90000,0.20789009016206045 +100000,0.21970743010601104 +110000,0.22077847183932975 +120000,0.20825596541231617 +130000,0.21957550461342215 +140000,0.2389412440181989 +150000,0.23714062568481795 +160000,0.2445372921428755 +170000,0.2351348470370127 +180000,0.2558043132373528 +190000,0.26794541753154627 +200000,0.26890467922622563 +210000,0.26723972112272537 +220000,0.29180199171794324 +230000,0.2861611493733239 +240000,0.30042097599273104 +250000,0.3085579193463208 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/mse.log new file mode 100644 index 000000000..d0f64ffd4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/logs/stat_eval/mse.log @@ -0,0 +1,26 @@ +step,stat_eval/mse +10000,5.449300775747645 +20000,4.978188709589359 +30000,3.85637926456429 +40000,5.026633822975743 +50000,4.443733814781956 +60000,4.7435069675862085 +70000,3.24473203755511 +80000,3.3840670690800287 +90000,2.544029254777595 +100000,1.9860024148928666 +110000,1.4553180767131595 +120000,1.5729647818631713 +130000,1.489725710727132 +140000,1.219335390252749 +150000,0.9140129515804041 +160000,1.154214973996024 +170000,0.9325891712844833 +180000,0.8950459262393003 +190000,0.7472245522198772 +200000,0.7141174725063044 +210000,0.7125758754027411 +220000,0.6350264071237407 +230000,0.6605859068019939 +240000,0.589495514255225 +250000,0.5355500417771413 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_best.pt new file mode 100644 index 000000000..461d7e8c8 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_latest.pt new file mode 100644 index 000000000..abcfc5c38 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..cc40558e5 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..f6c246891 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..5942bdd06 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..293fe700e Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..cc60b7f29 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..885959143 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..987599515 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..f12fd5b0e Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..5595549f5 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..f90cfa9c4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..4dfb44e6c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..df6d83b11 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..1154d2ddb Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t2/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/config.yaml new file mode 100644 index 000000000..4a08b2a32 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 10000 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 10000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 10 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=10 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_10_dm_t2 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/approx_kl.log new file mode 100644 index 000000000..3396e902d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/approx_kl.log @@ -0,0 +1,26 @@ +step,loss/approx_kl +10000,0.017162839389250922 +20000,0.008153320542381454 +30000,0.02806174752380078 +40000,0.015363225321440646 +50000,0.02499766650920113 +60000,0.030138129818563657 +70000,0.017589707983036832 +80000,0.017219546080256505 +90000,0.0449757121425743 +100000,0.01703144966935118 +110000,0.013713395277348656 +120000,0.016947500570677218 +130000,0.014263831044081595 +140000,0.011412945746754609 +150000,0.013357484767523905 +160000,0.03682679029880092 +170000,0.021361129207070916 +180000,0.02028621436523584 +190000,0.013765138341113925 +200000,0.009986901574302466 +210000,0.01607520234732268 +220000,0.03097562970360741 +230000,0.015259000111836937 +240000,0.0160499936377164 +250000,0.01604182083004465 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/entropy_loss.log new file mode 100644 index 000000000..31aa8b068 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/entropy_loss.log @@ -0,0 +1,26 @@ +step,loss/entropy_loss +10000,-1.8336105366547906 +20000,-1.7757686992486321 +30000,-1.683957004547119 +40000,-1.707256229718526 +50000,-1.7490935067335762 +60000,-1.711079136530558 +70000,-1.580990078051885 +80000,-1.5220090866088865 +90000,-1.4317609012126922 +100000,-1.3399026592572532 +110000,-1.37444961865743 +120000,-1.3268950502077739 +130000,-1.3286557157834369 +140000,-1.298214222987493 +150000,-1.219246439139048 +160000,-1.156877585252126 +170000,-1.1225961128870647 +180000,-1.0375946899255113 +190000,-1.0393990973631542 +200000,-1.0057903816302616 +210000,-0.9843683997790018 +220000,-0.9866093814373018 +230000,-0.9436851819356281 +240000,-0.946398800611496 +250000,-0.9181661238272986 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/policy_loss.log new file mode 100644 index 000000000..c0cf4e1b3 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/policy_loss.log @@ -0,0 +1,26 @@ +step,loss/policy_loss +10000,-0.009273230458137537 +20000,-0.010478399282284492 +30000,-0.007386054067057561 +40000,-0.006342114762513967 +50000,-0.004209828186681281 +60000,-0.009396415083989109 +70000,-0.0037700160978093775 +80000,0.0013291434727950966 +90000,-0.004904639870460919 +100000,-0.003464975929899043 +110000,-0.003501122980372185 +120000,0.0025758107413341234 +130000,-0.002970630593603114 +140000,-0.0035794133941950807 +150000,0.003837495189115885 +160000,-0.00493401366969076 +170000,-0.0013634561401874063 +180000,-0.001727834821713552 +190000,-0.0021600096691645693 +200000,-0.007440105718491583 +210000,-0.009821675936413577 +220000,-0.006228449193608233 +230000,-0.011736688739994828 +240000,-0.0037118799719790133 +250000,0.0005208627547257391 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/value_loss.log new file mode 100644 index 000000000..f721bee44 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/loss/value_loss.log @@ -0,0 +1,26 @@ +step,loss/value_loss +10000,0.9580231659044989 +20000,2.7967759754005836 +30000,1.9336452208241934 +40000,6.540552951246363 +50000,2.964832167621519 +60000,1.2649487286166867 +70000,0.6726866202000338 +80000,0.9265781716257606 +90000,1.1001698022165227 +100000,0.26401259571103275 +110000,0.3674949904876793 +120000,0.20808330208258438 +130000,0.23075740152828844 +140000,0.9426982774580968 +150000,0.19242678253483375 +160000,2.130547205016724 +170000,6.349442859256202 +180000,2.3904555634901694 +190000,0.26139480643060126 +200000,0.2294311008288561 +210000,0.10819160154607514 +220000,9.766333713203938 +230000,0.33365743277362886 +240000,0.2069441170229335 +250000,0.32696678112749117 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..eb7954291 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_constraint_violation.log @@ -0,0 +1,26 @@ +step,stat/ep_constraint_violation +10000,242.5 +20000,195.0 +30000,176.0 +40000,86.0 +50000,30.5 +60000,3.0 +70000,1.5 +80000,0.0 +90000,18.0 +100000,5.0 +110000,1.5 +120000,1.0 +130000,5.5 +140000,4.5 +150000,1.0 +160000,15.0 +170000,0.0 +180000,17.5 +190000,6.0 +200000,6.0 +210000,5.5 +220000,94.5 +230000,9.0 +240000,11.5 +250000,13.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_length.log new file mode 100644 index 000000000..485c09e10 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_length.log @@ -0,0 +1,26 @@ +step,stat/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_return.log new file mode 100644 index 000000000..3a3596055 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_return.log @@ -0,0 +1,26 @@ +step,stat/ep_return +10000,23.23021954273309 +20000,28.968820790941095 +30000,26.465572177125036 +40000,62.26463967209579 +50000,52.83971626304944 +60000,88.64615772882328 +70000,95.46926200873133 +80000,98.48514284524826 +90000,97.35368269634671 +100000,101.9212630986857 +110000,106.86393491604954 +120000,104.70944917128655 +130000,105.01326217399702 +140000,104.765492762955 +150000,107.19658164713178 +160000,98.74114138725591 +170000,108.58046914973335 +180000,106.29890210822622 +190000,108.71055897615537 +200000,105.75874422505152 +210000,113.45922878532959 +220000,96.7144454694176 +230000,108.64911499219119 +240000,111.36346141928198 +250000,111.70896190325638 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_reward.log new file mode 100644 index 000000000..ac89b1fc4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat/ep_reward.log @@ -0,0 +1,26 @@ +step,stat/ep_reward +10000,0.0619472521139549 +20000,0.07725018877584292 +30000,0.07057485913900009 +40000,0.16603903912558876 +50000,0.14090591003479852 +60000,0.2363897539435288 +70000,0.2545846986899502 +80000,0.2626270475873287 +90000,0.2596098205235912 +100000,0.27179003492982856 +110000,0.28497049310946543 +120000,0.27922519779009747 +130000,0.28003536579732535 +140000,0.27937464736788 +150000,0.2858575510590181 +160000,0.2633097103660158 +170000,0.28954791773262223 +180000,0.2834637389552699 +190000,0.2898948239364143 +200000,0.2820233179334707 +210000,0.30255794342754555 +220000,0.2579051879184469 +230000,0.28973097331250985 +240000,0.2969692304514186 +250000,0.29789056507535033 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..dcebd078f --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/constraint_violation.log @@ -0,0 +1,26 @@ +step,stat_eval/constraint_violation +10000,187.3 +20000,133.6 +30000,153.5 +40000,12.8 +50000,0.2 +60000,0.4 +70000,1.4 +80000,1.1 +90000,0.8 +100000,1.9 +110000,0.6 +120000,3.7 +130000,0.6 +140000,1.5 +150000,1.5 +160000,1.1 +170000,3.6 +180000,5.5 +190000,3.3 +200000,3.0 +210000,3.5 +220000,0.6 +230000,3.7 +240000,4.3 +250000,3.4 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..d1490483d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_length.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..9b791bb96 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_return.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_return +10000,31.877683457577568 +20000,39.95974273453135 +30000,48.18816467576372 +40000,74.63103820860779 +50000,94.41145070600388 +60000,93.1422956577442 +70000,105.05618041846878 +80000,105.66136850741145 +90000,109.56417148299352 +100000,109.31838123424959 +110000,110.63469309421775 +120000,114.32905204276699 +130000,110.49602476210183 +140000,108.77043084901945 +150000,110.99979639458306 +160000,114.94356156881103 +170000,114.041900202992 +180000,115.18489866526723 +190000,113.14345474656253 +200000,111.71315162005166 +210000,117.53982167273102 +220000,113.96781458572339 +230000,113.45433027256097 +240000,115.42547115923207 +250000,115.99191869358542 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..86f18bc74 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/ep_reward.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_reward +10000,0.08500715588687353 +20000,0.10655931395875026 +30000,0.12850177246870326 +40000,0.19901610188962077 +50000,0.25176386854934363 +60000,0.24837945508731782 +70000,0.28014981444925013 +80000,0.28176364935309717 +90000,0.2921711239546494 +100000,0.29151568329133226 +110000,0.29502584825124734 +120000,0.3048774721140453 +130000,0.29465606603227146 +140000,0.29005448226405184 +150000,0.29599945705222147 +160000,0.3065161641834961 +170000,0.30411173387464535 +180000,0.30715972977404593 +190000,0.30171587932416666 +200000,0.29790173765347105 +210000,0.31343952446061607 +220000,0.3039141722285957 +230000,0.3025448807268292 +240000,0.3078012564246188 +250000,0.3093117831828944 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/mse.log new file mode 100644 index 000000000..2ceec6ab7 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/logs/stat_eval/mse.log @@ -0,0 +1,26 @@ +step,stat_eval/mse +10000,5.358106273067775 +20000,4.824056070691449 +30000,4.069727859078167 +40000,2.052107124565309 +50000,0.8932812996752221 +60000,0.8924725221182873 +70000,0.8036518255378983 +80000,0.9092041820917757 +90000,0.596479986777514 +100000,0.5977651621556321 +110000,0.6689313140296042 +120000,0.569205113505319 +130000,0.7080948332538541 +140000,0.7198333847414115 +150000,0.6815316435178503 +160000,0.6445081056303883 +170000,0.5931300552827913 +180000,0.6171726339287528 +190000,0.6962594789691714 +200000,0.738853322963067 +210000,0.6787931997064593 +220000,0.7365780607175291 +230000,0.7027435684588335 +240000,0.6443063463688088 +250000,0.5699944921186665 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_best.pt new file mode 100644 index 000000000..27e0a0ebb Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_latest.pt new file mode 100644 index 000000000..6fffa1c6b Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..0cda372f9 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..41e1733bf Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..1be220832 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..d08751699 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..61d64a309 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..885959143 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..62fb26651 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..cb6e4e9dc Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..5fabbb718 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..f90cfa9c4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..e4fa8ffef Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..468993edc Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..4b62e7c3d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t2/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/config.yaml new file mode 100644 index 000000000..a8d613748 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 10000 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 10000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 1 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=1 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_1_dm_t2 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/approx_kl.log new file mode 100644 index 000000000..40f15860e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/approx_kl.log @@ -0,0 +1,26 @@ +step,loss/approx_kl +10000,0.01526332534461593 +20000,0.028425859593941522 +30000,0.01349818598246202 +40000,0.015232445672154423 +50000,0.03288926016927386 +60000,0.012463888560887426 +70000,0.016686130493568877 +80000,0.02601548177578176 +90000,0.03234866767500837 +100000,0.03258593849216899 +110000,0.01652888854732737 +120000,0.018172321224119516 +130000,0.014048153367669633 +140000,0.02243826535607999 +150000,0.01027400043870633 +160000,0.012500690292411793 +170000,0.012228007601030792 +180000,0.023420352678901208 +190000,0.020117365694992864 +200000,0.024281097849598154 +210000,0.01696631511246475 +220000,0.011895126583597933 +230000,0.01476740313034194 +240000,0.011400877608684823 +250000,0.013153900085793185 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/entropy_loss.log new file mode 100644 index 000000000..1d357fdef --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/entropy_loss.log @@ -0,0 +1,26 @@ +step,loss/entropy_loss +10000,-1.7947487274805707 +20000,-1.777595680952072 +30000,-1.8021287123362224 +40000,-1.7222305138905842 +50000,-1.6602578818798066 +60000,-1.6091089228789008 +70000,-1.5880245725313826 +80000,-1.610353672504425 +90000,-1.431754513581594 +100000,-1.3562142352263131 +110000,-1.325297458966573 +120000,-1.2676365613937377 +130000,-1.307635392745336 +140000,-1.288906999429067 +150000,-1.3385255674521128 +160000,-1.2267174502213796 +170000,-1.2753646194934845 +180000,-1.221980130672455 +190000,-1.2164108951886496 +200000,-1.1425705989201864 +210000,-1.1762908875942233 +220000,-1.161820614337921 +230000,-1.1160544057687123 +240000,-1.0642202417055766 +250000,-1.1047758281230928 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/policy_loss.log new file mode 100644 index 000000000..ea1376d1d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/policy_loss.log @@ -0,0 +1,26 @@ +step,loss/policy_loss +10000,-0.006145778950052269 +20000,-0.006022050629533254 +30000,-0.01715641780451096 +40000,-0.011228479567520006 +50000,-0.004397403647650529 +60000,-0.007648646839383107 +70000,-0.0034482160195837366 +80000,0.0006605767975645844 +90000,-0.008424842492033327 +100000,-0.00422913430688546 +110000,-0.002336910588550245 +120000,-0.006470048289935264 +130000,-0.010837395886298758 +140000,-0.006283481808858117 +150000,-0.010119351283947921 +160000,-0.005851463961735993 +170000,-0.00319415898552045 +180000,-0.009209261866493703 +190000,-0.005371113594668571 +200000,0.00285567327638163 +210000,-0.004745967068834414 +220000,-0.009587878714518116 +230000,-0.0057630592431936415 +240000,-0.0034013772759969437 +250000,-0.009126185599195438 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/value_loss.log new file mode 100644 index 000000000..0030fb422 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/loss/value_loss.log @@ -0,0 +1,26 @@ +step,loss/value_loss +10000,3.3295819274637592 +20000,2.9720533485725347 +30000,2.7800920107136533 +40000,6.690234301623806 +50000,1.3403157442372575 +60000,1.349473308578786 +70000,0.7536916884716572 +80000,0.6288539775346723 +90000,1.7619254518621006 +100000,0.5465941108077403 +110000,0.20322942845834163 +120000,0.8346353526197559 +130000,0.165836879510944 +140000,0.26567469149529993 +150000,0.48905775049218614 +160000,1.4271310578393244 +170000,0.21582438633905499 +180000,1.5228396449613957 +190000,0.15903897447137644 +200000,0.1959508636511919 +210000,0.3106468581680363 +220000,7.304910211413774 +230000,0.9317342506688311 +240000,0.44357073830246324 +250000,2.0912698639378657 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..8ac1440db --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_constraint_violation.log @@ -0,0 +1,26 @@ +step,stat/ep_constraint_violation +10000,166.5 +20000,251.5 +30000,284.0 +40000,239.5 +50000,6.5 +60000,1.5 +70000,10.0 +80000,1.5 +90000,17.0 +100000,5.0 +110000,4.5 +120000,5.5 +130000,12.5 +140000,17.5 +150000,11.5 +160000,18.0 +170000,2.5 +180000,15.5 +190000,12.0 +200000,11.5 +210000,14.5 +220000,118.0 +230000,19.5 +240000,28.0 +250000,26.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_length.log new file mode 100644 index 000000000..485c09e10 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_length.log @@ -0,0 +1,26 @@ +step,stat/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_return.log new file mode 100644 index 000000000..586cda4b3 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_return.log @@ -0,0 +1,26 @@ +step,stat/ep_return +10000,34.23278063336491 +20000,38.498820761432754 +30000,40.58446885736932 +40000,35.10218800047487 +50000,90.307792109351 +60000,87.66469210904606 +70000,96.40948109479741 +80000,108.06406812458832 +90000,103.43800253417106 +100000,107.74066193727903 +110000,113.35387905566728 +120000,110.7600277608091 +130000,116.84675060665263 +140000,118.10507323515284 +150000,115.99784879649117 +160000,117.03837292485198 +170000,116.82610632400488 +180000,110.93741309835856 +190000,121.62938547855197 +200000,121.58946428101578 +210000,121.03695347761746 +220000,106.41590023602946 +230000,122.61018859181925 +240000,124.15945180851119 +250000,126.45361268211579 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_reward.log new file mode 100644 index 000000000..ad6ae178b --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat/ep_reward.log @@ -0,0 +1,26 @@ +step,stat/ep_reward +10000,0.09128741502230643 +20000,0.10266352203048734 +30000,0.1082252502863182 +40000,0.09360583466793297 +50000,0.2408207789582693 +60000,0.2337725122907895 +70000,0.25709194958612647 +80000,0.2881708483322355 +90000,0.2758346734244562 +100000,0.28730843183274407 +110000,0.3022770108151127 +120000,0.29536007402882425 +130000,0.3115913349510737 +140000,0.3149468619604076 +150000,0.30932759679064314 +160000,0.31210232779960534 +170000,0.3115362835306797 +180000,0.2958331015956228 +190000,0.3243450279428053 +200000,0.32423857141604207 +210000,0.3227652092736466 +220000,0.28377573396274525 +230000,0.326960502911518 +240000,0.33109187148936314 +250000,0.3372096338189754 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..8aa9ccd89 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/constraint_violation.log @@ -0,0 +1,26 @@ +step,stat_eval/constraint_violation +10000,314.5 +20000,261.3 +30000,260.6 +40000,0.6 +50000,1.3 +60000,4.6 +70000,1.6 +80000,2.4 +90000,4.3 +100000,3.2 +110000,2.5 +120000,4.7 +130000,4.1 +140000,9.8 +150000,4.9 +160000,7.3 +170000,6.6 +180000,5.0 +190000,7.9 +200000,12.0 +210000,8.8 +220000,19.0 +230000,12.1 +240000,13.9 +250000,13.4 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..d1490483d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_length.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..311b6b4ae --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_return.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_return +10000,23.86705961930538 +20000,49.44257931049036 +30000,46.264064417402636 +40000,77.91389319329164 +50000,87.19128111951173 +60000,99.30019886718807 +70000,105.98196799460179 +80000,103.4669164837572 +90000,113.4379032238979 +100000,113.42587875422915 +110000,113.5323369964602 +120000,117.72270264125059 +130000,118.39518891708617 +140000,117.246843234992 +150000,118.72003056142428 +160000,119.86158913509239 +170000,118.90391950074175 +180000,117.94301802196512 +190000,119.75233409369882 +200000,121.34264907693814 +210000,121.70270520919719 +220000,124.89842686123393 +230000,123.57683044463715 +240000,125.43260218428895 +250000,127.74352030674495 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..d35f47d1d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/ep_reward.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_reward +10000,0.06364549231814767 +20000,0.1318468781613076 +30000,0.12337083844640706 +40000,0.20777038184877772 +50000,0.23251008298536466 +60000,0.26480053031250156 +70000,0.2826185813189381 +80000,0.2759117772900192 +90000,0.30250107526372777 +100000,0.3024690100112777 +110000,0.3027528986572272 +120000,0.3139272070433349 +130000,0.3157205037788965 +140000,0.3126582486266454 +150000,0.31658674816379806 +160000,0.3196309043602464 +170000,0.31707711866864463 +180000,0.3145147147252403 +190000,0.3193395575831969 +200000,0.3235803975385017 +210000,0.32454054722452585 +220000,0.33306247162995717 +230000,0.3295382145190324 +240000,0.33448693915810396 +250000,0.3406493874846531 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/mse.log new file mode 100644 index 000000000..c97d5716f --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/logs/stat_eval/mse.log @@ -0,0 +1,26 @@ +step,stat_eval/mse +10000,4.467393725381707 +20000,2.8037340912961026 +30000,3.1910502728100854 +40000,1.289278253680667 +50000,1.1052550927693134 +60000,0.8219184578844476 +70000,0.7142248081784139 +80000,0.7238424708955454 +90000,0.6332907751260588 +100000,0.5609053631877245 +110000,0.5474059936007236 +120000,0.4425182203192464 +130000,0.4408330582805505 +140000,0.35060438637481944 +150000,0.3831864288970405 +160000,0.33513703520677407 +170000,0.37533129838567897 +180000,0.39608560049509695 +190000,0.35236254726813554 +200000,0.3479881818782021 +210000,0.35441219177794936 +220000,0.2774085607274273 +230000,0.3137786035156288 +240000,0.2529097839582506 +250000,0.2655757750780853 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_best.pt new file mode 100644 index 000000000..d21e3a033 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_latest.pt new file mode 100644 index 000000000..1540df2b7 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..19599d0f4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..fdc5baf6f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..ca7d1353b Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..93de8dba1 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..bd9eeda21 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..885959143 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..ba79d9ec9 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..fa1b58902 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..10409d190 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..f90cfa9c4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..0d76de508 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..89b544c1f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..ae2928cd4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t2/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/config.yaml b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/config.yaml new file mode 100644 index 000000000..1b45bc03a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 10000 + eval_save_best: true + filter_train_actions: false + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 10000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: false + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: false + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: false +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=False +- algo_config.penalize_sf_diff=False +- algo_config.use_safe_reset=False +- algo_config.sf_penalty=False +- task_config.use_constraint_penalty=True +output_dir: ./models/rl_models/ppo/none_cpen_dm_t2 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: true + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/approx_kl.log new file mode 100644 index 000000000..a3bdf4404 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/approx_kl.log @@ -0,0 +1,26 @@ +step,loss/approx_kl +10000,0.024118476718043288 +20000,0.022943364002276208 +30000,0.046871219781072185 +40000,0.03383808438278113 +50000,0.014849015969472626 +60000,0.01878883936442435 +70000,0.014402925025206062 +80000,0.0018799341943425436 +90000,0.010454942906896272 +100000,0.03722052115481347 +110000,0.012339547214408716 +120000,0.015262041000338894 +130000,0.007210810125494999 +140000,0.01756978190969676 +150000,0.01620884680111582 +160000,0.019884430357099823 +170000,0.01779471222156038 +180000,0.015984614038219054 +190000,0.015752666195233668 +200000,0.025723958642144374 +210000,0.024227396781013037 +220000,0.028652821698536474 +230000,0.005987323638206968 +240000,0.029156767256790773 +250000,0.01568275892253344 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/entropy_loss.log new file mode 100644 index 000000000..4fb87c0db --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/entropy_loss.log @@ -0,0 +1,26 @@ +step,loss/entropy_loss +10000,-1.8083121657371524 +20000,-1.8019202252229054 +30000,-1.7983015775680542 +40000,-1.7688773989677429 +50000,-1.7899225334326423 +60000,-1.6923580507437386 +70000,-1.651252418756485 +80000,-1.5844629724820454 +90000,-1.5801876326402027 +100000,-1.5703354497750601 +110000,-1.51395267645518 +120000,-1.5356650431950887 +130000,-1.4505620896816251 +140000,-1.3731438775857288 +150000,-1.3931834677855175 +160000,-1.3451145450274151 +170000,-1.2590734322865802 +180000,-1.2559285958607993 +190000,-1.2226751705010732 +200000,-1.2602473676204684 +210000,-1.2699571351210275 +220000,-1.186603393157323 +230000,-1.1642865955829622 +240000,-1.1853824257850647 +250000,-1.1689524551232657 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/policy_loss.log new file mode 100644 index 000000000..f196c9328 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/policy_loss.log @@ -0,0 +1,26 @@ +step,loss/policy_loss +10000,-0.003949545029501169 +20000,-0.010205737328006182 +30000,-0.01252658621860192 +40000,-0.007245917519289366 +50000,-0.0064513154050621175 +60000,-0.01277390382046759 +70000,9.18918801837941e-05 +80000,-0.008157631586765145 +90000,-0.008978439567693768 +100000,0.014007530675031885 +110000,-0.0028481431483454943 +120000,-0.006362883163883023 +130000,-0.012980559340279202 +140000,0.0028521419085793865 +150000,-0.008032788572331652 +160000,0.0022755761076083876 +170000,-0.005619976556420303 +180000,-0.0012017784252041338 +190000,-0.0035053851185561044 +200000,-0.003420596123477012 +210000,-0.008004857737914674 +220000,-0.006815696616989425 +230000,-0.0003151675992295319 +240000,0.0007000282358280777 +250000,0.004144850409476371 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/value_loss.log new file mode 100644 index 000000000..c0252bf7d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/loss/value_loss.log @@ -0,0 +1,26 @@ +step,loss/value_loss +10000,1.8227588424912338 +20000,1.3742972713604724 +30000,2.256639325431487 +40000,0.5371642576097851 +50000,0.22073705793004703 +60000,0.7335893079770874 +70000,0.7589696239863137 +80000,0.9395151217270872 +90000,0.22166181085519257 +100000,0.6046337555161221 +110000,0.28734236407024827 +120000,0.1902842387049592 +130000,0.3787289010338316 +140000,0.5621060401039331 +150000,0.28648349299715087 +160000,0.13889241229471522 +170000,0.16872833719179708 +180000,0.8407712749103725 +190000,0.5933347672817912 +200000,0.12567094490180347 +210000,0.5330014987909848 +220000,0.207061152902713 +230000,0.2805861956503439 +240000,0.443980952348009 +250000,0.5939950258565319 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..baa15345d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_constraint_violation.log @@ -0,0 +1,26 @@ +step,stat/ep_constraint_violation +10000,32.90909090909091 +20000,44.55555555555556 +30000,52.25 +40000,63.2 +50000,22.2 +60000,148.5 +70000,64.5 +80000,34.0 +90000,1.5 +100000,0.0 +110000,6.5 +120000,0.0 +130000,3.5 +140000,0.5 +150000,8.5 +160000,0.5 +170000,1.5 +180000,24.0 +190000,15.5 +200000,2.5 +210000,12.0 +220000,6.0 +230000,5.5 +240000,16.5 +250000,1.5 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_length.log new file mode 100644 index 000000000..606572e49 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_length.log @@ -0,0 +1,26 @@ +step,stat/ep_length +10000,83.36363636363636 +20000,107.44444444444444 +30000,119.0 +40000,169.4 +50000,176.4 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_return.log new file mode 100644 index 000000000..bc309eaa4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_return.log @@ -0,0 +1,26 @@ +step,stat/ep_return +10000,20.59354495891806 +20000,24.199217149166717 +30000,29.958410470952305 +40000,43.2728607294667 +50000,45.18172988330468 +60000,51.11921962480507 +70000,69.39772166745726 +80000,64.4522362649387 +90000,78.12012203480478 +100000,83.29908797880893 +110000,84.36698693955537 +120000,94.09170317941988 +130000,97.49358823805045 +140000,98.70492310170823 +150000,102.05461466895599 +160000,109.08931706644395 +170000,110.15625231681835 +180000,102.98646282468553 +190000,108.14073394740073 +200000,110.71593510456573 +210000,109.56672740881675 +220000,109.96111520337809 +230000,111.96541598478403 +240000,105.78881897603425 +250000,111.798574030791 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_reward.log new file mode 100644 index 000000000..4c4dad933 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat/ep_reward.log @@ -0,0 +1,26 @@ +step,stat/ep_reward +10000,0.25288489203321807 +20000,0.2228898269722238 +30000,0.2514916843994165 +40000,0.25543610337465444 +50000,0.25609263955989037 +60000,0.1363179189994802 +70000,0.18506059111321935 +80000,0.17187263003983655 +90000,0.2083203254261461 +100000,0.22213090127682383 +110000,0.22497863183881434 +120000,0.250911208478453 +130000,0.2599829019681345 +140000,0.26321312827122195 +150000,0.272145639117216 +160000,0.2909048455105172 +170000,0.29375000617818225 +180000,0.27463056753249476 +190000,0.288375290526402 +200000,0.2952424936121753 +210000,0.29217793975684464 +220000,0.2932296405423416 +230000,0.29857444262609073 +240000,0.2821035172694246 +250000,0.298129530748776 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..affa95501 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/constraint_violation.log @@ -0,0 +1,26 @@ +step,stat_eval/constraint_violation +10000,144.2 +20000,100.3 +30000,111.4 +40000,139.9 +50000,101.8 +60000,195.5 +70000,119.0 +80000,15.0 +90000,0.3 +100000,0.3 +110000,0.1 +120000,3.3 +130000,0.2 +140000,0.3 +150000,1.3 +160000,0.2 +170000,0.1 +180000,1.1 +190000,1.3 +200000,0.6 +210000,1.0 +220000,1.0 +230000,1.6 +240000,2.0 +250000,1.8 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..d1490483d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_length.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_length +10000,375.0 +20000,375.0 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..62b4baf1d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_return.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_return +10000,32.21835923788923 +20000,41.931306302464634 +30000,56.96663627362187 +40000,55.02043714595918 +50000,58.445225710688725 +60000,51.91453373150441 +70000,57.06851367143632 +80000,74.50299726342242 +90000,80.63988590190242 +100000,83.42664742071948 +110000,88.26582207163939 +120000,95.03271287755774 +130000,101.53709859826856 +140000,102.98635400477727 +150000,107.41071994085982 +160000,109.85127689760505 +170000,110.78321456289082 +180000,111.70872772978346 +190000,113.24140610293234 +200000,114.05274140333752 +210000,114.71364516012522 +220000,114.53969865613801 +230000,114.97230380163823 +240000,114.44755132767614 +250000,115.3431328988521 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..3c69dc7f2 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/ep_reward.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_reward +10000,0.08591562463437129 +20000,0.11181681680657236 +30000,0.15191103006299167 +40000,0.14672116572255783 +50000,0.15585393522850327 +60000,0.13843875661734512 +70000,0.15218270312383023 +80000,0.19867465936912648 +90000,0.21503969573840648 +100000,0.22247105978858528 +110000,0.2353755255243717 +120000,0.25342056767348736 +130000,0.27076559626204955 +140000,0.27463027734607276 +150000,0.2864285865089595 +160000,0.2929367383936134 +170000,0.29542190550104214 +180000,0.2978899406127559 +190000,0.3019770829411529 +200000,0.3041406437422333 +210000,0.3059030537603339 +220000,0.30543919641636796 +230000,0.30659281013770195 +240000,0.3051934702071364 +250000,0.3075816877302722 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/mse.log new file mode 100644 index 000000000..b79761eef --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/logs/stat_eval/mse.log @@ -0,0 +1,26 @@ +step,stat_eval/mse +10000,4.854668295731338 +20000,3.615589418367975 +30000,5.126461855768948 +40000,4.86662973568113 +50000,4.454115549136346 +60000,4.419954669265197 +70000,3.4690938385745738 +80000,2.2736259842691124 +90000,1.9379115402632743 +100000,1.7955369301362123 +110000,1.4292870395551391 +120000,1.1633308994533802 +130000,1.2636663407998807 +140000,1.0679620017048959 +150000,1.0046207841898975 +160000,0.9379385165372023 +170000,0.7755680978318659 +180000,0.7387680268476051 +190000,0.726322925258416 +200000,0.6637891558848958 +210000,0.6737777212010451 +220000,0.7477002745248209 +230000,0.758120481873713 +240000,0.6991795253832537 +250000,0.8428515568148214 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_best.pt new file mode 100644 index 000000000..bb12b6b17 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_latest.pt new file mode 100644 index 000000000..896e1096c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..b344f7096 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..b55efab13 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..2401e687d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..5ee162da4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..dbfea274c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..e8ba20746 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..526dfea18 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..41ec92559 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..d1c195c16 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..f90cfa9c4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..453027515 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..8812ec08d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..a427c52cd Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t2/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/config.yaml b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/config.yaml new file mode 100644 index 000000000..0abee7b8f --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 10000 + eval_save_best: true + filter_train_actions: false + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 10000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: false + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: false + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: false +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=False +- algo_config.penalize_sf_diff=False +- algo_config.use_safe_reset=False +- algo_config.sf_penalty=False +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/none_dm_t2 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/approx_kl.log new file mode 100644 index 000000000..23866df19 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/approx_kl.log @@ -0,0 +1,26 @@ +step,loss/approx_kl +10000,0.029328657209407537 +20000,0.022014579367047794 +30000,0.01649409615977978 +40000,0.023039809455319 +50000,0.019315944286063313 +60000,0.029233624894792837 +70000,0.020879776562408858 +80000,0.019471472168030836 +90000,0.024843971834828456 +100000,0.032070744344188516 +110000,0.016293461519914364 +120000,0.018227850876670955 +130000,0.01594270186227125 +140000,0.04335399777046405 +150000,0.02821884999478546 +160000,0.022189799156816056 +170000,0.02760256033895227 +180000,0.02008047339040786 +190000,0.016292289029418806 +200000,0.015524561549924935 +210000,0.010626850987318902 +220000,0.029338243223416317 +230000,0.020133427303517238 +240000,0.018616934852131332 +250000,0.013627865821278343 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/entropy_loss.log new file mode 100644 index 000000000..4ac4ea7be --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/entropy_loss.log @@ -0,0 +1,26 @@ +step,loss/entropy_loss +10000,-1.8121712346871697 +20000,-1.8394321143627166 +30000,-1.7653158684571586 +40000,-1.6305527528127033 +50000,-1.6065607567628226 +60000,-1.573303320010503 +70000,-1.5133593102296194 +80000,-1.3561521947383883 +90000,-1.27740984360377 +100000,-1.3389735599358876 +110000,-1.3196870227654776 +120000,-1.3357127288977304 +130000,-1.327176715930303 +140000,-1.2900292098522186 +150000,-1.2043556491533915 +160000,-1.214868011077245 +170000,-1.2166355510552724 +180000,-1.1715630412101747 +190000,-1.1486826618512471 +200000,-1.087653011083603 +210000,-1.0348510543505354 +220000,-1.0052286624908446 +230000,-0.9155511607726415 +240000,-0.9324680934349695 +250000,-0.8860280066728594 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/policy_loss.log new file mode 100644 index 000000000..aa905b55e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/policy_loss.log @@ -0,0 +1,26 @@ +step,loss/policy_loss +10000,-0.00459844770979481 +20000,-0.007897369453223137 +30000,-0.012269518425331884 +40000,-0.0011960952845730458 +50000,-0.007210939384708089 +60000,0.0111943536867532 +70000,-0.0030753914615561234 +80000,-0.009783869369201184 +90000,-0.0010908700082097752 +100000,0.003576407948216605 +110000,-0.004812662765089295 +120000,0.009363959055022424 +130000,-0.011981510507628069 +140000,-0.0018294926700080608 +150000,0.00864399956720097 +160000,-0.0007979744977540532 +170000,0.005772694409997086 +180000,-0.006155042551306949 +190000,0.007446863907026405 +200000,-0.014025644835841428 +210000,-0.008819912212089274 +220000,0.008839603623527112 +230000,-7.285821059305726e-05 +240000,0.007738688469453471 +250000,-0.008471450505474947 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/value_loss.log new file mode 100644 index 000000000..fc75657a1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/loss/value_loss.log @@ -0,0 +1,26 @@ +step,loss/value_loss +10000,3.5356466603663925 +20000,1.4726442899853602 +30000,1.160891551392388 +40000,0.32109336721203724 +50000,0.46157173536545226 +60000,1.2407517290661425 +70000,0.33419645855414204 +80000,0.6934745379899542 +90000,0.23066233244103146 +100000,0.3344049147571314 +110000,0.11251440940033049 +120000,0.28343842149101184 +130000,0.26070299044015066 +140000,0.2914450995695175 +150000,0.2731727212381372 +160000,0.3140986622974732 +170000,0.5888623587742432 +180000,0.17552172174858596 +190000,0.1649269506232135 +200000,0.13370848883139816 +210000,0.20359164520257983 +220000,0.1952828270885816 +230000,0.18408600913100254 +240000,0.24488092798993738 +250000,0.20760822301010756 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..9fb35f627 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_constraint_violation.log @@ -0,0 +1,26 @@ +step,stat/ep_constraint_violation +10000,39.888888888888886 +20000,79.66666666666667 +30000,42.0 +40000,27.2 +50000,26.4 +60000,102.5 +70000,70.0 +80000,31.0 +90000,12.5 +100000,8.5 +110000,6.5 +120000,14.5 +130000,5.5 +140000,5.5 +150000,48.5 +160000,7.5 +170000,30.0 +180000,6.5 +190000,24.5 +200000,24.5 +210000,20.0 +220000,18.0 +230000,52.5 +240000,58.5 +250000,58.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_length.log new file mode 100644 index 000000000..8f9ce8d16 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_length.log @@ -0,0 +1,26 @@ +step,stat/ep_length +10000,103.0 +20000,138.5 +30000,159.66666666666666 +40000,167.6 +50000,176.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_return.log new file mode 100644 index 000000000..3c60ea4e0 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_return.log @@ -0,0 +1,26 @@ +step,stat/ep_return +10000,22.220671997275193 +20000,28.496343677977404 +30000,34.461195494489395 +40000,44.17225937088596 +50000,46.42473639954539 +60000,75.59053421372309 +70000,90.18110216873583 +80000,102.94597156336778 +90000,105.49616053238145 +100000,108.61043304518543 +110000,109.94107528962479 +120000,113.2246055844304 +130000,112.25243482968139 +140000,108.50685830721856 +150000,108.26280332669606 +160000,115.57611605395718 +170000,112.28314273427527 +180000,113.22421202310207 +190000,114.57502708846195 +200000,111.14109621775705 +210000,112.5809194454992 +220000,114.38566134151016 +230000,114.25216700458893 +240000,112.84749175240256 +250000,112.1101683619173 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_reward.log new file mode 100644 index 000000000..9ff88eed9 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat/ep_reward.log @@ -0,0 +1,26 @@ +step,stat/ep_reward +10000,0.22479046341984896 +20000,0.20880302035300255 +30000,0.21571904982051116 +40000,0.2635169263769452 +50000,0.26376088119822516 +60000,0.20157475790326157 +70000,0.24048293911662888 +80000,0.27452259083564745 +90000,0.2813230947530172 +100000,0.2896278214538278 +110000,0.29317620077233275 +120000,0.30193228155848106 +130000,0.2993398262124837 +140000,0.2893516221525828 +150000,0.2887008088711895 +160000,0.3082029761438858 +170000,0.2994217139580674 +180000,0.30193123206160555 +190000,0.30553340556923186 +200000,0.29637625658068545 +210000,0.30021578518799785 +220000,0.3050284302440271 +230000,0.3046724453455705 +240000,0.3009266446730735 +250000,0.29896044896511276 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..a44e86962 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/constraint_violation.log @@ -0,0 +1,26 @@ +step,stat_eval/constraint_violation +10000,409.6 +20000,351.1 +30000,259.5 +40000,296.4 +50000,211.0 +60000,162.8 +70000,136.3 +80000,3.0 +90000,64.7 +100000,3.6 +110000,5.4 +120000,4.1 +130000,3.9 +140000,69.8 +150000,15.9 +160000,5.2 +170000,5.7 +180000,2.7 +190000,4.3 +200000,3.0 +210000,3.9 +220000,8.8 +230000,9.5 +240000,13.9 +250000,10.5 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..860400ac6 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_length.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_length +10000,375.0 +20000,359.7 +30000,375.0 +40000,375.0 +50000,375.0 +60000,375.0 +70000,375.0 +80000,375.0 +90000,375.0 +100000,375.0 +110000,375.0 +120000,375.0 +130000,375.0 +140000,375.0 +150000,375.0 +160000,375.0 +170000,375.0 +180000,375.0 +190000,375.0 +200000,375.0 +210000,375.0 +220000,375.0 +230000,375.0 +240000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..07fe715f4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_return.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_return +10000,28.40691068946335 +20000,24.493756064961484 +30000,45.883714786303095 +40000,48.46229311505614 +50000,53.276378476907595 +60000,72.2873671827708 +70000,80.48788389627492 +80000,107.20282157799222 +90000,99.44820133142211 +100000,111.03535879899127 +110000,115.42709678399119 +120000,114.00453381919726 +130000,114.07536573452072 +140000,100.04509621689013 +150000,111.57377884352732 +160000,117.5323179522856 +170000,119.47144218121359 +180000,112.94741784985945 +190000,116.23148358728739 +200000,112.88174127516223 +210000,115.92785646306102 +220000,113.42585723260122 +230000,113.81206212809926 +240000,116.76623748691046 +250000,118.25209779268846 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..c0a4ca509 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/ep_reward.log @@ -0,0 +1,26 @@ +step,stat_eval/ep_reward +10000,0.07575176183856894 +20000,0.06980787950607258 +30000,0.12235657276347492 +40000,0.12923278164014967 +50000,0.14207034260508694 +60000,0.19276631248738876 +70000,0.21463435705673314 +80000,0.28587419087464594 +90000,0.265195203550459 +100000,0.2960942901306434 +110000,0.30780559142397657 +120000,0.304012090184526 +130000,0.30420097529205525 +140000,0.26678692324504033 +150000,0.2975300769160728 +160000,0.3134195145394283 +170000,0.3185905124832362 +180000,0.3011931142662918 +190000,0.30995062289943304 +200000,0.3010179767337659 +210000,0.3091409505681627 +220000,0.3024689526202699 +230000,0.303498832341598 +240000,0.31137663329842785 +250000,0.3153389274471693 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/mse.log new file mode 100644 index 000000000..4ada80645 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/logs/stat_eval/mse.log @@ -0,0 +1,26 @@ +step,stat_eval/mse +10000,7.150746802748363 +20000,5.350706788467387 +30000,4.676288707503403 +40000,5.041078840122521 +50000,4.556592775517363 +60000,3.852493963587789 +70000,2.7039304938121136 +80000,1.004503514901741 +90000,1.6032942992256847 +100000,0.8198212337556281 +110000,0.7363486543793981 +120000,0.8169451082444541 +130000,0.8608876516367253 +140000,1.4010965530650605 +150000,0.8433654927185735 +160000,0.7241059854218577 +170000,0.7231438064174354 +180000,0.6737853810625526 +190000,0.6663754833580455 +200000,0.7793087275966786 +210000,0.7396161760964703 +220000,0.7302736007260012 +230000,0.6932550738655163 +240000,0.6920019343148439 +250000,0.627575576238759 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_best.pt new file mode 100644 index 000000000..3aa412e78 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_latest.pt new file mode 100644 index 000000000..a334166b4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..11a5880b2 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..6b3f328b1 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..89f37e60c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..ae0e08e48 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..7d74afcf5 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..3a61dd541 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..e8bbc90ff Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..6b2384c76 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..4e83dc778 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..e050eaa27 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..f5cdb583c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..d746a7e30 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..15f1a2a39 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t2/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/plotting_results.py b/experiments/crazyflie/plotting_results.py index 8bf685102..e2bf03426 100644 --- a/experiments/crazyflie/plotting_results.py +++ b/experiments/crazyflie/plotting_results.py @@ -20,7 +20,7 @@ plot = False save_figs = True -suffix = '_dm3' +suffix = '_dm_t2' # suffix = '' ordered_models = [f'mpsf_0.1{suffix}', f'mpsf_1{suffix}', f'mpsf_10{suffix}', f'none{suffix}', f'none_cpen{suffix}'] @@ -295,7 +295,7 @@ def create_paper_plot(data_extractor): fig.savefig(f'./results_cf/{algo_name}/graphs/real/{image_suffix}.png', dpi=300) else: if suffix != '': - fig.savefig(f'./results_cf/{algo_name}/graphs/{suffix.replace("_", "")}/{image_suffix}.png', dpi=300) + fig.savefig(f'./results_cf/{algo_name}/graphs/{suffix[1:]}/{image_suffix}.png', dpi=300) else: fig.savefig(f'./results_cf/{algo_name}/graphs/{image_suffix}.png', dpi=300) # tikzplotlib.save(f'./all_trajs/{image_suffix}.tex', axis_height='2.2in', axis_width='3.5in') @@ -360,7 +360,7 @@ def plot_log(algo, key, all_results): if save_figs: image_suffix = key.replace('/', '__') if suffix != '': - fig.savefig(f'./results_cf/{algo}/graphs/{suffix.replace("_", "")}/{image_suffix}.png', dpi=300) + fig.savefig(f'./results_cf/{algo}/graphs/{suffix[1:]}/{image_suffix}.png', dpi=300) else: fig.savefig(f'./results_cf/{algo}/graphs/{image_suffix}.png', dpi=300) plt.close() diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/constraint_violations.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/constraint_violations.png new file mode 100644 index 000000000..e3a4c2634 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/constraint_violations.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/feasible_iterations.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/feasible_iterations.png new file mode 100644 index 000000000..37e1411e0 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/feasible_iterations.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/length.png new file mode 100644 index 000000000..1070ecd78 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__approx_kl.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__approx_kl.png new file mode 100644 index 000000000..1b4bddbab Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__approx_kl.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__entropy_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__entropy_loss.png new file mode 100644 index 000000000..231263ae5 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__entropy_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__policy_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__policy_loss.png new file mode 100644 index 000000000..b44284e92 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__policy_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__value_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__value_loss.png new file mode 100644 index 000000000..8d376097e Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/loss__value_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/magnitude_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/magnitude_of_corrections.png new file mode 100644 index 000000000..2a81f46e2 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/magnitude_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/max_correction.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/max_correction.png new file mode 100644 index 000000000..e42c34343 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/max_correction.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/number_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/number_of_corrections.png new file mode 100644 index 000000000..bcba2403e Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/number_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_magnitude_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_magnitude_of_corrections.png new file mode 100644 index 000000000..9c6a03ef2 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_magnitude_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_max_correction.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_max_correction.png new file mode 100644 index 000000000..0f13fb8d4 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/percent_max_correction.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rate_of_change.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rate_of_change.png new file mode 100644 index 000000000..8c69c5985 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rate_of_change.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/reward.png new file mode 100644 index 000000000..62de9c4ac Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rmse.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rmse.png new file mode 100644 index 000000000..f5bdc158e Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/rmse.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_constraint_violation.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_constraint_violation.png new file mode 100644 index 000000000..919b3abf9 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_constraint_violation.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_length.png new file mode 100644 index 000000000..925057621 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_return.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_return.png new file mode 100644 index 000000000..48ae223be Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_return.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_reward.png new file mode 100644 index 000000000..b5348b443 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat__ep_reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__constraint_violation.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__constraint_violation.png new file mode 100644 index 000000000..fb4e93320 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__constraint_violation.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_length.png new file mode 100644 index 000000000..d765a4afa Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_return.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_return.png new file mode 100644 index 000000000..7d0e93843 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_return.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_reward.png new file mode 100644 index 000000000..60579535f Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__ep_reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__mse.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__mse.png new file mode 100644 index 000000000..73d7360ea Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t2/stat_eval__mse.png differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t2.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t2.pkl new file mode 100644 index 000000000..9c2845c17 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t2.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t2.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t2.pkl new file mode 100644 index 000000000..6a90fb9a7 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t2.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t2.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t2.pkl new file mode 100644 index 000000000..8a5dcab04 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t2.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t2.pkl b/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t2.pkl new file mode 100644 index 000000000..2ebea7ad0 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t2.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/none_dm_t2.pkl b/experiments/crazyflie/results_cf/ppo/none_dm_t2.pkl new file mode 100644 index 000000000..ad0196079 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/none_dm_t2.pkl differ diff --git a/experiments/crazyflie/test_crazyflie.sh b/experiments/crazyflie/test_crazyflie.sh index da6b621b6..e9236b24a 100755 --- a/experiments/crazyflie/test_crazyflie.sh +++ b/experiments/crazyflie/test_crazyflie.sh @@ -48,7 +48,7 @@ else SF_PEN_TAG="_$4" fi -TAG="$1${CONSTR_PEN_TAG}${SF_PEN_TAG}_dm3" +TAG="$1${CONSTR_PEN_TAG}${SF_PEN_TAG}_dm_t2" echo $TAG $SYS $ALGO $TASK python3 ./train_rl.py \ diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 0cd89ca66..57ba3ba0b 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -372,7 +372,8 @@ def train_step(self): def get_reward(self, obs, info): wp_idx = min(info['current_step']//20, self.X_GOAL.shape[0] - 1) # +1 because state has already advanced but counter not incremented. state_error = obs[:4] - self.X_GOAL[wp_idx] - dist = np.sum(np.array([2, 0, 2, 0]) * state_error * state_error) + dist = np.sum(np.array([5, 0, 5, 0]) * state_error * state_error) + dist += np.sum(obs[[1,3,4,5]]*obs[[1,3,4,5]]) rew = -dist rew = np.exp(rew)