diff --git a/experiments/crazyflie/config_overrides/crazyflie_track.yaml b/experiments/crazyflie/config_overrides/crazyflie_track.yaml index 39f322dff..54093e3f0 100644 --- a/experiments/crazyflie/config_overrides/crazyflie_track.yaml +++ b/experiments/crazyflie/config_overrides/crazyflie_track.yaml @@ -46,7 +46,7 @@ task_config: init_state_randomization_info: init_x: distrib: 'uniform' - low: -0.95 + low: -0.5 high: 0.95 init_x_dot: distrib: 'uniform' @@ -121,7 +121,7 @@ task_config: - 0.25 - 0.25 lower_bounds: - - -0.95 + - -0.5 - -2 - -0.95 - -2 diff --git a/experiments/crazyflie/crazyflie_controller.py b/experiments/crazyflie/crazyflie_controller.py index 1388e7d1e..c16362d44 100644 --- a/experiments/crazyflie/crazyflie_controller.py +++ b/experiments/crazyflie/crazyflie_controller.py @@ -92,7 +92,7 @@ 'constrained_variable': 'state', 'active_dims': [0,1,2,3,6,7], 'upper_bounds': [0.95, 2, 0.95, 2, 0.25, 0.25], - 'lower_bounds': [-0.95, -2, -0.95, -2, -0.25, -0.25]}, + 'lower_bounds': [-0.5, -2, -0.95, -2, -0.25, -0.25]}, {'constraint_form': 'default_constraint', 'constrained_variable': 'input', } diff --git a/experiments/crazyflie/crazyflie_utils.py b/experiments/crazyflie/crazyflie_utils.py index d8131ca61..f6266b815 100644 --- a/experiments/crazyflie/crazyflie_utils.py +++ b/experiments/crazyflie/crazyflie_utils.py @@ -47,7 +47,7 @@ def calc_error(CTRL_FREQ, TEST=0, CERTIFIED=False, MODEL='none'): reward = np.sum(np.exp(-dist)) print('Model Errors: ', np.linalg.norm(errors)) - print('NUM ERRORS POS: ', np.sum(np.abs(states[:, [0,2]]) >= 0.95)) + print('NUM ERRORS POS: ', np.sum(states[:, [0,2]] >= 0.95) + np.sum(states[:, [0,2]] <= [-0.5, -0.95])) print('NUM ERRORS VEL: ', np.sum(np.abs(states[:, [1,3]]) >= 2)) print('NUM ERRORS ANGLE: ', np.sum(np.abs(states[:, [6,7]]) >= 0.25)) print('Rate of change (inputs): ', np.linalg.norm(get_discrete_derivative(actions.reshape(-1, 1), CTRL_FREQ))) diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/config.yaml new file mode 100644 index 000000000..1b9217764 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 12500 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 12500 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 0.1 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=0.1 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_0.1_dm_t1 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.5 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.5 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/approx_kl.log new file mode 100644 index 000000000..0fe722a85 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/approx_kl.log @@ -0,0 +1,11 @@ +step,loss/approx_kl +25000,0.015373280962618687 +50000,0.005466316675301641 +75000,0.007334801884523281 +100000,0.008855676196981221 +125000,0.007131252387383333 +150000,0.016024184126096467 +175000,0.02016867712372914 +200000,0.014259486020697903 +225000,0.027465859936395037 +250000,0.018274522332164148 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/entropy_loss.log new file mode 100644 index 000000000..5fb43d7ac --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/entropy_loss.log @@ -0,0 +1,11 @@ +step,loss/entropy_loss +25000,-1.9422575136025746 +50000,-1.9314473847548168 +75000,-1.718100424607595 +100000,-1.6027250786622367 +125000,-1.4119963268438975 +150000,-1.5207090298334758 +175000,-1.4677082558472954 +200000,-1.476611814896266 +225000,-1.4850205163160959 +250000,-1.5199606776237486 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/policy_loss.log new file mode 100644 index 000000000..27b26ff24 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/policy_loss.log @@ -0,0 +1,11 @@ +step,loss/policy_loss +25000,-0.008487398050761605 +50000,-0.00880940280480252 +75000,-0.009933911955918398 +100000,-0.005607683146801758 +125000,-0.004757624400151668 +150000,-0.003386248358056189 +175000,-0.002005064096720082 +200000,-0.005854441330401723 +225000,0.004003693091684401 +250000,-0.010062139415958049 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/value_loss.log new file mode 100644 index 000000000..dee9feb50 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/loss/value_loss.log @@ -0,0 +1,11 @@ +step,loss/value_loss +25000,7.793937429077428 +50000,13.893374047401428 +75000,23.89295015323139 +100000,19.281110565347028 +125000,27.118762816218055 +150000,12.193586144909709 +175000,18.288683568642096 +200000,11.609428511226813 +225000,5.738791902706543 +250000,4.967841796357956 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..bbf527cd4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_constraint_violation.log @@ -0,0 +1,11 @@ +step,stat/ep_constraint_violation +25000,309.5 +50000,126.5 +75000,100.5 +100000,105.5 +125000,219.0 +150000,31.5 +175000,34.5 +200000,31.5 +225000,18.5 +250000,12.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_length.log new file mode 100644 index 000000000..1bbfbe673 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_length.log @@ -0,0 +1,11 @@ +step,stat/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_return.log new file mode 100644 index 000000000..316e1a030 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_return.log @@ -0,0 +1,11 @@ +step,stat/ep_return +25000,98.29296866306379 +50000,204.49077267324304 +75000,231.51700314647044 +100000,244.08289157053508 +125000,204.9318586508445 +150000,267.52341422784 +175000,279.615252885838 +200000,282.54841177443075 +225000,291.78329345184795 +250000,277.8975227464764 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_reward.log new file mode 100644 index 000000000..794139854 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat/ep_reward.log @@ -0,0 +1,11 @@ +step,stat/ep_reward +25000,0.26211458310150343 +50000,0.5453087271286481 +75000,0.6173786750572545 +100000,0.6508877108547602 +125000,0.546484956402252 +150000,0.71339577127424 +175000,0.7456406743622347 +200000,0.7534624313984821 +225000,0.7780887825382612 +250000,0.7410600606572704 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..98d1ea4d6 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/constraint_violation.log @@ -0,0 +1,11 @@ +step,stat_eval/constraint_violation +25000,313.8 +50000,149.9 +75000,77.5 +100000,49.6 +125000,81.0 +150000,54.2 +175000,14.0 +200000,20.4 +225000,6.3 +250000,8.9 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..77156d7a8 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_length.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_length +25000,375.0 +50000,375.0 +75000,347.4 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..2692eab7a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_return.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_return +25000,88.62866852515265 +50000,170.36628653538293 +75000,220.67695423872442 +100000,259.69489981783687 +125000,257.9325823567933 +150000,268.0747350960819 +175000,284.6449566612613 +200000,291.41684390934665 +225000,292.2902783561045 +250000,284.50350354738134 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..c2f57bb64 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/ep_reward.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_reward +25000,0.23634311606707376 +50000,0.45431009742768785 +75000,0.6312424530879946 +100000,0.6925197328475649 +125000,0.6878202196181156 +150000,0.7148659602562184 +175000,0.7590532177633634 +200000,0.7771115837582576 +225000,0.7794407422829452 +250000,0.7586760094596836 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/mse.log new file mode 100644 index 000000000..9beb3f9f4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/logs/stat_eval/mse.log @@ -0,0 +1,11 @@ +step,stat_eval/mse +25000,5.8508605567178344 +50000,2.286126506630821 +75000,1.1331641861949528 +100000,0.9233659072164485 +125000,1.0836500874481039 +150000,0.8747244595360524 +175000,0.5327289953335879 +200000,0.4728552796842651 +225000,0.4592665668693261 +250000,0.494120142345116 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_best.pt new file mode 100644 index 000000000..7217177dd Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_latest.pt new file mode 100644 index 000000000..763410616 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..b99cb5e54 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..4597d17cc Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..6f928ba7a Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..9fd4335ab Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..952273f3c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..1620e1542 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..fbcf2c468 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..aee2b0319 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..925bb888c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..dbbede633 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..13543d6f3 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..4a87737bd Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..2c9e7e0d0 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_0.1_dm_t1/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/config.yaml new file mode 100644 index 000000000..72402564a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 12500 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 12500 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 10 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=10 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_10_dm_t1 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.5 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.5 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/approx_kl.log new file mode 100644 index 000000000..6ddf8b4d8 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/approx_kl.log @@ -0,0 +1,11 @@ +step,loss/approx_kl +25000,0.018455955494816108 +50000,0.010702723303499318 +75000,0.009607930706503492 +100000,0.016764563655791185 +125000,0.023770159618773806 +150000,0.01788063358593111 +175000,0.014562522460861751 +200000,0.015011066287600744 +225000,0.012960871689332026 +250000,0.029812676735067118 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/entropy_loss.log new file mode 100644 index 000000000..79237e9c0 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/entropy_loss.log @@ -0,0 +1,11 @@ +step,loss/entropy_loss +25000,-1.8862766345342 +50000,-1.700019065539042 +75000,-1.547554886341095 +100000,-1.5677641173203787 +125000,-1.4856210490067803 +150000,-1.5585129221280416 +175000,-1.4193623582522075 +200000,-1.3969172139962516 +225000,-1.3315460066000622 +250000,-1.406306936343511 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/policy_loss.log new file mode 100644 index 000000000..be13e860a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/policy_loss.log @@ -0,0 +1,11 @@ +step,loss/policy_loss +25000,-0.0003983289444551634 +50000,-0.005687190252499296 +75000,-0.011786269980547055 +100000,-0.007212558879763106 +125000,0.005760900871984058 +150000,-0.0009547383959019098 +175000,-0.005646775855873617 +200000,-0.009674041073034998 +225000,-0.006358966928264517 +250000,-0.00879903417567245 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/value_loss.log new file mode 100644 index 000000000..d7be0e2dc --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/loss/value_loss.log @@ -0,0 +1,11 @@ +step,loss/value_loss +25000,18.781559015456654 +50000,16.969693520074465 +75000,3.691525782888695 +100000,41.5119013783197 +125000,2.525071512817414 +150000,61.357619626613804 +175000,13.071331205089013 +200000,3.741153871666198 +225000,9.807165359453418 +250000,45.26616429580396 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..9b869dda1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_constraint_violation.log @@ -0,0 +1,11 @@ +step,stat/ep_constraint_violation +25000,138.5 +50000,17.5 +75000,5.0 +100000,104.5 +125000,8.5 +150000,100.5 +175000,15.0 +200000,2.0 +225000,14.0 +250000,72.5 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_length.log new file mode 100644 index 000000000..1bbfbe673 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_length.log @@ -0,0 +1,11 @@ +step,stat/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_return.log new file mode 100644 index 000000000..a6b2e128f --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_return.log @@ -0,0 +1,11 @@ +step,stat/ep_return +25000,136.4411251321208 +50000,206.53907301345117 +75000,255.04723786262394 +100000,212.10884509244113 +125000,264.5990547435817 +150000,209.62190268584723 +175000,241.75299407766937 +200000,267.55077501795745 +225000,273.4446411632766 +250000,232.21238540519658 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_reward.log new file mode 100644 index 000000000..98802a839 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat/ep_reward.log @@ -0,0 +1,11 @@ +step,stat/ep_reward +25000,0.3638430003523221 +50000,0.5507708613692032 +75000,0.6801259676336637 +100000,0.5656235869131764 +125000,0.7055974793162179 +150000,0.5589917404955926 +175000,0.6446746508737851 +200000,0.71346873338122 +225000,0.7291857097687375 +250000,0.6192330277471909 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..7417ea3f9 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/constraint_violation.log @@ -0,0 +1,11 @@ +step,stat_eval/constraint_violation +25000,40.1 +50000,1.0 +75000,11.1 +100000,31.9 +125000,33.0 +150000,3.4 +175000,5.9 +200000,2.3 +225000,1.7 +250000,1.2 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..ea24cb6ad --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_length.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..5b8e4ff25 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_return.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_return +25000,221.35431607683535 +50000,245.96824300568028 +75000,258.14614119061395 +100000,242.50327768040387 +125000,250.4586056616162 +150000,277.73656985536076 +175000,279.929935207192 +200000,270.3616463549882 +225000,286.48173416729423 +250000,273.81972810875294 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..d9148d80e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/ep_reward.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_reward +25000,0.5902781762048943 +50000,0.6559153146818142 +75000,0.6883897098416372 +100000,0.6466754071477437 +125000,0.6678896150976431 +150000,0.7406308529476286 +175000,0.7464798272191787 +200000,0.7209643902799684 +225000,0.7639512911127845 +250000,0.730185941623341 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/mse.log new file mode 100644 index 000000000..72135440e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/logs/stat_eval/mse.log @@ -0,0 +1,11 @@ +step,stat_eval/mse +25000,1.193763703143639 +50000,0.7351824950278791 +75000,0.7011095181348395 +100000,0.9689334091823717 +125000,0.9352880868845561 +150000,0.5477449047355255 +175000,0.49887196672680806 +200000,0.612611785455265 +225000,0.5065651343096008 +250000,0.5814251630618006 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_best.pt new file mode 100644 index 000000000..6224315e4 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_latest.pt new file mode 100644 index 000000000..b92547e8d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..893ee11e3 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..b66ad71f3 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..4c0ad2919 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..dd77b03e9 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..09264e981 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..1620e1542 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..13e5cb241 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..78ec540b2 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..139988f72 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..deb409bef Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..bc548d1fc Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..ce16d2664 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..2feba56d8 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_10_dm_t1/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/config.yaml b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/config.yaml new file mode 100644 index 000000000..3f03630f1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 12500 + eval_save_best: true + filter_train_actions: true + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 12500 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: true + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: 1 + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: true +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=True +- algo_config.penalize_sf_diff=True +- algo_config.use_safe_reset=True +- algo_config.sf_penalty=1 +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/mpsf_1_dm_t1 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.5 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.5 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/approx_kl.log new file mode 100644 index 000000000..933eb0a6f --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/approx_kl.log @@ -0,0 +1,11 @@ +step,loss/approx_kl +25000,0.011983548710122704 +50000,0.007240420215142271 +75000,0.013987611990887674 +100000,0.033805271940461054 +125000,0.016684512450592593 +150000,0.010917089596235503 +175000,0.02851446665590629 +200000,0.008650578403224547 +225000,0.016166486723038057 +250000,0.021702582887761914 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/entropy_loss.log new file mode 100644 index 000000000..dae694350 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/entropy_loss.log @@ -0,0 +1,11 @@ +step,loss/entropy_loss +25000,-1.7869488179683686 +50000,-1.680667605002721 +75000,-1.5505745828151705 +100000,-1.3706525981426239 +125000,-1.1320367793242136 +150000,-1.1427529215812684 +175000,-1.1319833258787793 +200000,-1.1064694563547768 +225000,-1.0391930480798086 +250000,-1.0682918508847554 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/policy_loss.log new file mode 100644 index 000000000..d2eeb89bd --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/policy_loss.log @@ -0,0 +1,11 @@ +step,loss/policy_loss +25000,-0.013726487474895317 +50000,-0.013086573736249541 +75000,-0.00738413951350229 +100000,-0.008427998431474924 +125000,-0.006981027493136322 +150000,0.0007158119752356124 +175000,-0.006722582078802305 +200000,-0.003312848141114063 +225000,-0.0010270837232175148 +250000,-0.00983703771807023 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/value_loss.log new file mode 100644 index 000000000..2d0cb0a8c --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/loss/value_loss.log @@ -0,0 +1,11 @@ +step,loss/value_loss +25000,10.58578248122036 +50000,12.78539541745385 +75000,23.9124026766889 +100000,18.145729056773337 +125000,18.63314235427643 +150000,39.58260589876788 +175000,4.856384516298901 +200000,8.343428117667521 +225000,40.653343028452966 +250000,15.785309985483796 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..cdc3cd138 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_constraint_violation.log @@ -0,0 +1,11 @@ +step,stat/ep_constraint_violation +25000,286.0 +50000,142.0 +75000,77.0 +100000,105.0 +125000,99.0 +150000,90.0 +175000,9.0 +200000,4.5 +225000,78.5 +250000,52.5 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_length.log new file mode 100644 index 000000000..1bbfbe673 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_length.log @@ -0,0 +1,11 @@ +step,stat/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_return.log new file mode 100644 index 000000000..9510966cb --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_return.log @@ -0,0 +1,11 @@ +step,stat/ep_return +25000,109.1212095965052 +50000,174.85843003471328 +75000,202.9902935455577 +100000,201.98588251181098 +125000,215.14297059569998 +150000,231.71020203040953 +175000,273.9645669312584 +200000,285.111506568176 +225000,247.9917265559331 +250000,270.5603283210004 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_reward.log new file mode 100644 index 000000000..5b55f2076 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat/ep_reward.log @@ -0,0 +1,11 @@ +step,stat/ep_reward +25000,0.2909898922573472 +50000,0.4662891467592354 +75000,0.5413074494548206 +100000,0.538629020031496 +125000,0.5737145882551999 +150000,0.617893872081092 +175000,0.7305721784833556 +200000,0.7602973508484694 +225000,0.6613112708158216 +250000,0.721494208856001 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..cbdea813a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/constraint_violation.log @@ -0,0 +1,11 @@ +step,stat_eval/constraint_violation +25000,262.3 +50000,246.4 +75000,11.1 +100000,0.7 +125000,62.6 +150000,2.1 +175000,3.7 +200000,8.7 +225000,8.8 +250000,10.3 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..ea24cb6ad --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_length.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..7252bfbe2 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_return.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_return +25000,135.37083450190653 +50000,182.8506913011274 +75000,246.0258200156938 +100000,231.90729922686705 +125000,250.7467574025457 +150000,274.7585809413638 +175000,287.36768515932215 +200000,287.19914299680715 +225000,289.5975057423429 +250000,292.45172956815225 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..1f58191b0 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/ep_reward.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_reward +25000,0.36098889200508405 +50000,0.4876018434696731 +75000,0.6560688533751835 +100000,0.6184194646049789 +125000,0.668658019740122 +150000,0.73268954917697 +175000,0.7663138270915257 +200000,0.765864381324819 +225000,0.7722600153129143 +250000,0.7798712788484061 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/mse.log new file mode 100644 index 000000000..001362aa7 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/logs/stat_eval/mse.log @@ -0,0 +1,11 @@ +step,stat_eval/mse +25000,3.0776111495192775 +50000,2.3959459372867125 +75000,0.7426472740347801 +100000,0.930230419620376 +125000,1.1434161435520604 +150000,0.6204082413500871 +175000,0.5099004910258945 +200000,0.4862534111867839 +225000,0.496921537136918 +250000,0.459647974917013 diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_best.pt new file mode 100644 index 000000000..2db16e64f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_latest.pt new file mode 100644 index 000000000..dc50fb1a8 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..e222a91ec Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..4eb781d3d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..d4ee3c864 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..bcd56675e Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..1a3b06fff Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..1620e1542 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..7743a7a41 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..2ed8c98c0 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..49836f799 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..deb409bef Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..edd225dcb Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..90e408a5a Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..1b21c1892 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/mpsf_1_dm_t1/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/config.yaml b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/config.yaml new file mode 100644 index 000000000..335bb10be --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 12500 + eval_save_best: true + filter_train_actions: false + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 12500 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: false + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: false + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: false +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=False +- algo_config.penalize_sf_diff=False +- algo_config.use_safe_reset=False +- algo_config.sf_penalty=False +- task_config.use_constraint_penalty=True +output_dir: ./models/rl_models/ppo/none_cpen_dm_t1 +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.5 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.5 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: true + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/approx_kl.log new file mode 100644 index 000000000..010aa4ef3 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/approx_kl.log @@ -0,0 +1,11 @@ +step,loss/approx_kl +25000,0.0528251670844232 +50000,0.01379673272604123 +75000,0.005932452044604968 +100000,0.029873112996574486 +125000,0.027339184385103483 +150000,0.020330765492205197 +175000,0.013811932036575547 +200000,0.012486077550177773 +225000,0.011740429771210377 +250000,0.025070898570508388 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/entropy_loss.log new file mode 100644 index 000000000..aca44be08 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/entropy_loss.log @@ -0,0 +1,11 @@ +step,loss/entropy_loss +25000,-1.723974245786667 +50000,-1.6247641265392303 +75000,-1.5847967545191448 +100000,-1.357618369658788 +125000,-1.2350152095158895 +150000,-1.1959191620349885 +175000,-1.0219345251719159 +200000,-0.7896973510583241 +225000,-0.781983016928037 +250000,-0.7180173615614572 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/policy_loss.log new file mode 100644 index 000000000..2aba6aa21 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/policy_loss.log @@ -0,0 +1,11 @@ +step,loss/policy_loss +25000,0.00627194134441969 +50000,0.00048422138388939647 +75000,-0.0016717209433809587 +100000,-0.008298612360026342 +125000,-0.0043468337599645995 +150000,-0.01057483947044988 +175000,-0.0010427866119759295 +200000,-0.011587030425937284 +225000,-0.012253212057655177 +250000,0.010024889747345413 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/value_loss.log new file mode 100644 index 000000000..5f6530711 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/loss/value_loss.log @@ -0,0 +1,11 @@ +step,loss/value_loss +25000,6.637456504579705 +50000,10.046740005392184 +75000,2.453911653732844 +100000,3.142254498212312 +125000,4.875860223182892 +150000,1.3604849557734837 +175000,4.544172112679773 +200000,3.5649717883648733 +225000,5.11402984080284 +250000,2.3362668023283275 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..44881cb04 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_constraint_violation.log @@ -0,0 +1,11 @@ +step,stat/ep_constraint_violation +25000,40.857142857142854 +50000,176.66666666666666 +75000,21.0 +100000,24.5 +125000,77.5 +150000,22.0 +175000,37.5 +200000,24.0 +225000,16.0 +250000,23.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_length.log new file mode 100644 index 000000000..3383f92a2 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_length.log @@ -0,0 +1,11 @@ +step,stat/ep_length +25000,142.42857142857142 +50000,306.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_return.log new file mode 100644 index 000000000..0a577e5f1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_return.log @@ -0,0 +1,11 @@ +step,stat/ep_return +25000,71.52296467273064 +50000,134.7675544518858 +75000,221.73059944834858 +100000,270.67471042919624 +125000,299.4295222572512 +150000,305.379107885634 +175000,315.32437140455363 +200000,313.54398813061226 +225000,330.34333293734005 +250000,317.31820527339863 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_reward.log new file mode 100644 index 000000000..21ec5b65c --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat/ep_reward.log @@ -0,0 +1,11 @@ +step,stat/ep_reward +25000,0.5038958477533669 +50000,0.48218817974318 +75000,0.5912815985289295 +100000,0.72179922781119 +125000,0.7984787260193366 +150000,0.814344287695024 +175000,0.8408649904121429 +200000,0.8361173016816327 +225000,0.8809155544995735 +250000,0.8461818807290631 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..247dcb7ac --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/constraint_violation.log @@ -0,0 +1,11 @@ +step,stat_eval/constraint_violation +25000,280.3 +50000,214.0 +75000,46.2 +100000,146.4 +125000,79.9 +150000,97.4 +175000,73.5 +200000,174.5 +225000,147.8 +250000,140.7 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..ea24cb6ad --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_length.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_length +25000,375.0 +50000,375.0 +75000,375.0 +100000,375.0 +125000,375.0 +150000,375.0 +175000,375.0 +200000,375.0 +225000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..9e43106fb --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_return.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_return +25000,147.50289010281853 +50000,139.84951602040758 +75000,197.5793831667882 +100000,201.33700723616684 +125000,259.50054029801856 +150000,254.66723197217954 +175000,267.44195257524507 +200000,222.25263736573166 +225000,228.43693340555492 +250000,234.82745510208787 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..7d319d28a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/ep_reward.log @@ -0,0 +1,11 @@ +step,stat_eval/ep_reward +25000,0.3933410402741827 +50000,0.3729320427210868 +75000,0.5268783551114351 +100000,0.5368986859631116 +125000,0.6920014407947164 +150000,0.6791126185924787 +175000,0.7131785402006535 +200000,0.5926736996419512 +225000,0.6091651557481464 +250000,0.626206546938901 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/mse.log new file mode 100644 index 000000000..dec79fdbb --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/logs/stat_eval/mse.log @@ -0,0 +1,11 @@ +step,stat_eval/mse +25000,3.4165246707081955 +50000,4.121272951968232 +75000,1.911096609138512 +100000,1.931523351890403 +125000,0.8961145919428437 +150000,1.057307648626507 +175000,0.8830492377047976 +200000,1.6475340051722014 +225000,1.5524838275434667 +250000,1.3257558939365444 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_best.pt new file mode 100644 index 000000000..ef6acd020 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_latest.pt new file mode 100644 index 000000000..a74da15f9 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..f5c098fe7 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..5fffde852 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..5d75fc2f3 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..3e50a7711 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..ae1498cbe Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..82a1fe77a Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..c2e32eea9 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..e671b1042 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..4a02ac2ea Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..deb409bef Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..1fb4a4e9c Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..f040fa452 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..4834d624f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_cpen_dm_t1/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/config.yaml b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/config.yaml new file mode 100644 index 000000000..67044a943 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/config.yaml @@ -0,0 +1,246 @@ +algo: ppo +algo_config: + activation: tanh + actor_lr: 0.001 + clip_obs: 10 + clip_param: 0.2 + clip_reward: 10 + critic_lr: 0.001 + deque_size: 10 + entropy_coef: 0.01 + eval_batch_size: 10 + eval_interval: 5000 + eval_save_best: true + filter_train_actions: false + gae_lambda: 0.95 + gamma: 0.99 + hidden_dim: 128 + log_interval: 5000 + max_env_steps: 250000 + max_grad_norm: 0.5 + mini_batch_size: 256 + norm_obs: false + norm_reward: false + num_checkpoints: 0 + num_workers: 1 + opt_epochs: 20 + penalize_sf_diff: false + rollout_batch_size: 1 + rollout_steps: 1000 + save_interval: 0 + sf_penalty: false + target_kl: 0.01 + tensorboard: false + training: true + use_clipped_value: false + use_gae: true + use_safe_reset: false +device: cpu +kv_overrides: +- sf_config.cost_function=one_step_cost +- algo_config.filter_train_actions=False +- algo_config.penalize_sf_diff=False +- algo_config.use_safe_reset=False +- algo_config.sf_penalty=False +- task_config.use_constraint_penalty=False +output_dir: ./models/rl_models/ppo/none_dm +overrides: +- ./config_overrides/crazyflie_track.yaml +- ./config_overrides/ppo_crazyflie.yaml +- ./config_overrides/nl_mpsc.yaml +restore: null +safety_filter: nl_mpsc +seed: null +sf_config: + cost_function: one_step_cost + decay_factor: 0.85 + horizon: 10 + integration_algo: rk4 + mpsc_cost_horizon: 5 + n_samples: 600 + prior_info: + prior_prop: null + prior_prop_rand_info: null + randomize_prior_prop: false + q_lin: + - 0.008 + - 1.85 + - 0.008 + - 1.85 + - 10 + - 10 + r_lin: + - 2 + use_acados: true + use_terminal_set: false + warmstart: true +tag: temp +task: quadrotor +task_config: + adversary_disturbance: null + adversary_disturbance_offset: 0.0 + adversary_disturbance_scale: 0.01 + camera_view: + - 5 + - -40 + - -40 + - 0.5 + - -1 + - 0.5 + constraint_penalty: -1 + constraints: + - active_dims: + - 0 + - 1 + - 2 + - 3 + - 6 + - 7 + constrained_variable: state + constraint_form: bounded_constraint + lower_bounds: + - -0.95 + - -2 + - -0.95 + - -2 + - -0.25 + - -0.25 + upper_bounds: + - 0.95 + - 2 + - 0.95 + - 2 + - 0.25 + - 0.25 + - constrained_variable: input + constraint_form: default_constraint + cost: quadratic + ctrl_freq: 500 + disturbances: + dynamics: + - disturbance_func: white_noise + std: 0.2 + observation: + - disturbance_func: white_noise + std: 0.002 + done_on_out_of_bound: true + done_on_violation: false + episode_len_sec: 15 + gui: false + inertial_prop: + Ixx: 1.4e-05 + Iyy: 1.4e-05 + Izz: 2.17e-05 + M: 0.0345 + inertial_prop_randomization_info: + Ixx: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Iyy: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + Izz: + distrib: uniform + high: 1.0e-06 + low: -1.0e-06 + M: + distrib: uniform + high: 0.0025 + low: -0.0025 + info_in_reset: true + init_state: + init_p: 0 + init_phi: 0 + init_psi: 0 + init_q: 0 + init_r: 0 + init_theta: 0 + init_x: 0 + init_x_dot: 0 + init_y: 0 + init_y_dot: 0 + init_z: 1 + init_z_dot: 0 + init_state_randomization_info: + init_p: + distrib: uniform + high: 0.5 + low: -0.5 + init_phi: + distrib: uniform + high: 0.25 + low: -0.25 + init_psi: + distrib: uniform + high: 0 + low: 0 + init_q: + distrib: uniform + high: 0.5 + low: -0.5 + init_r: + distrib: uniform + high: 0 + low: 0 + init_theta: + distrib: uniform + high: 0.25 + low: -0.25 + init_x: + distrib: uniform + high: 0.95 + low: -0.95 + init_x_dot: + distrib: uniform + high: 2 + low: -2 + init_y: + distrib: uniform + high: 0.95 + low: -0.95 + init_y_dot: + distrib: uniform + high: 2 + low: -2 + init_z: + distrib: uniform + high: 1 + low: 1 + init_z_dot: + distrib: uniform + high: 0 + low: 0 + norm_act_scale: 0.1 + normalized_rl_action_space: false + obs_goal_horizon: 0 + physics: pyb + pyb_freq: 1000 + quad_type: 3 + randomized_inertial_prop: true + randomized_init: false + rew_act_weight: 0.0001 + rew_exponential: true + rew_state_weight: 1.0 + seed: 1337 + task: traj_tracking + task_info: + num_cycles: 1 + proj_normal: + - 0 + - 1 + - 1 + proj_point: + - 0 + - 0 + - 0.5 + trajectory_plane: xz + trajectory_position_offset: + - 0 + - 1 + trajectory_scale: 1 + trajectory_type: figure8 + use_constraint_penalty: false + verbose: false +use_gpu: false diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/approx_kl.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/approx_kl.log new file mode 100644 index 000000000..934f4e0ac --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/approx_kl.log @@ -0,0 +1,51 @@ +step,loss/approx_kl +5000,0.03111585771354535 +10000,0.019215839748115585 +15000,0.019070144591387367 +20000,0.036891327344346796 +25000,0.03420391258162757 +30000,0.042424871764766676 +35000,0.03765717096781978 +40000,0.030128701042849572 +45000,0.01686045506115382 +50000,0.035347777915497626 +55000,0.02741859578139459 +60000,0.014998494379688055 +65000,0.008285012662721176 +70000,0.011234460712876171 +75000,0.02241281192982569 +80000,0.013851157164511583 +85000,0.027086389414034785 +90000,0.023028108081780373 +95000,0.011858385164911547 +100000,0.025700207000287866 +105000,0.011959706894898166 +110000,0.018821984448004515 +115000,0.008375087958605339 +120000,0.03195297028481339 +125000,0.013925671096270284 +130000,0.015891470752346017 +135000,0.017064063933988412 +140000,0.016245162983735403 +145000,0.005940153950359672 +150000,0.0037341874558478594 +155000,0.017253518434396632 +160000,0.024434501631185414 +165000,0.016069660638459028 +170000,0.0351452441381601 +175000,0.014514057330476744 +180000,0.03234816839297612 +185000,0.01988837701501325 +190000,0.01595238349012409 +195000,0.024402525396241496 +200000,0.013608571592097479 +205000,0.012342223311619212 +210000,0.018801829393487422 +215000,0.03045094138166556 +220000,0.032081503882848965 +225000,0.016588885698001828 +230000,0.03236772323337694 +235000,0.021971511743807542 +240000,0.03551197895624985 +245000,0.014868506796968481 +250000,0.017438900350437807 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/entropy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/entropy_loss.log new file mode 100644 index 000000000..c9c34a3f4 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/entropy_loss.log @@ -0,0 +1,51 @@ +step,loss/entropy_loss +5000,-1.8728261232376098 +10000,-1.9173495988051097 +15000,-1.9007334947586059 +20000,-1.8074538350105285 +25000,-1.8121917804082237 +30000,-1.8099495132764178 +35000,-1.7923817892869311 +40000,-1.8086549242337546 +45000,-1.812590891122818 +50000,-1.8121514320373535 +55000,-1.8316043198108674 +60000,-1.8380544304847717 +65000,-1.842243993282318 +70000,-1.8378231088320411 +75000,-1.7809293250242866 +80000,-1.7060474336147309 +85000,-1.6569529235363007 +90000,-1.5886452158292135 +95000,-1.5859472672144572 +100000,-1.588825539747874 +105000,-1.542691852649053 +110000,-1.4914049307505288 +115000,-1.4426835596561431 +120000,-1.4596077084541321 +125000,-1.4620793879032135 +130000,-1.4312764962514242 +135000,-1.438825402657191 +140000,-1.4225559870402016 +145000,-1.4290083030859628 +150000,-1.3851939757664997 +155000,-1.379422144095103 +160000,-1.3981589774290721 +165000,-1.4202676117420199 +170000,-1.3818019191424051 +175000,-1.36373850107193 +180000,-1.3530198156833648 +185000,-1.370780958731969 +190000,-1.406001381079356 +195000,-1.4549828946590424 +200000,-1.4417802751064301 +205000,-1.4666405578454336 +210000,-1.4242243985335032 +215000,-1.4087341010570527 +220000,-1.3880786021550495 +225000,-1.4148000597953796 +230000,-1.4256861666838327 +235000,-1.3984951456387837 +240000,-1.375607450803121 +245000,-1.3668199539184571 +250000,-1.3894291420777638 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/policy_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/policy_loss.log new file mode 100644 index 000000000..7a80e8e8e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/policy_loss.log @@ -0,0 +1,51 @@ +step,loss/policy_loss +5000,-0.002084379807723889 +10000,-0.005456682887683342 +15000,-0.011406015348665019 +20000,-0.017956694053132195 +25000,-0.01317180776207406 +30000,0.0002993475950439716 +35000,0.009078891890892813 +40000,-0.0028667671658241193 +45000,-0.0045228065088625 +50000,0.0075454084436709255 +55000,-0.010807862853042709 +60000,-0.004911638983071793 +65000,-0.007351270000296559 +70000,-0.01323132331732094 +75000,-0.007209445428686928 +80000,-0.00334384770906524 +85000,-0.009415327356316613 +90000,-0.006256867010242234 +95000,-0.004018655137536981 +100000,-0.007446260218822161 +105000,-0.005459928901820965 +110000,-0.005698378027969752 +115000,-0.010016416160803458 +120000,0.0035948238904857524 +125000,-0.009605092616725583 +130000,-0.0022717666680076827 +135000,-0.007619176856754228 +140000,-0.004740909439745421 +145000,0.0021780242562697712 +150000,-0.011668166291513203 +155000,-0.003018869862590817 +160000,-0.008050839546151548 +165000,-0.0025921093397960095 +170000,-0.005574956154936967 +175000,-0.010414552969924992 +180000,9.31597521543254e-05 +185000,0.004151330526100362 +190000,-0.005649840762640212 +195000,-0.0024474519986348612 +200000,-0.010436395094653574 +205000,-0.004621344524852863 +210000,-0.0032661527297550507 +215000,-0.0030974208961965033 +220000,-0.01000946649333365 +225000,-0.0037686137634224486 +230000,0.0029820299477656157 +235000,-0.0007473170459760985 +240000,0.007587965492611942 +245000,-0.010689337122756845 +250000,-0.008282696975429774 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/value_loss.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/value_loss.log new file mode 100644 index 000000000..1a7b95dc3 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/loss/value_loss.log @@ -0,0 +1,51 @@ +step,loss/value_loss +5000,3.5613782801182206 +10000,2.589342663010864 +15000,0.6740293106944819 +20000,1.0245248662656639 +25000,1.8527975964252719 +30000,1.6976280757930926 +35000,2.720042996807449 +40000,1.0223185776422656 +45000,0.7889964962026378 +50000,6.361704895001326 +55000,11.043946758496388 +60000,6.200095772691827 +65000,3.600257772577506 +70000,1.1760217002508404 +75000,3.4111478049990103 +80000,5.058016605339295 +85000,7.326645303273532 +90000,7.202561923458568 +95000,12.9345496992959 +100000,8.350701424426088 +105000,8.292379781414141 +110000,7.868319035678384 +115000,1.8959297473723535 +120000,0.5188762668898732 +125000,3.15108054056095 +130000,2.915733516427594 +135000,1.3634119034988337 +140000,3.991770612591437 +145000,1.7206168102100474 +150000,1.9318040045702989 +155000,0.5999060034001161 +160000,2.8128187813723002 +165000,1.1957943665355046 +170000,0.9770943467857325 +175000,0.9305066401513555 +180000,1.53902720202193 +185000,1.0911323577096312 +190000,2.0371736659576696 +195000,1.5170588916939969 +200000,1.5006984982714546 +205000,28.33609919253354 +210000,1.4890403570944046 +215000,0.6612628650907064 +220000,0.46040076355706105 +225000,0.5286180818977717 +230000,1.2707223800489336 +235000,4.03396635379444 +240000,4.132647984371447 +245000,4.586296987213354 +250000,0.6605474643184872 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_constraint_violation.log new file mode 100644 index 000000000..b305b71f0 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_constraint_violation.log @@ -0,0 +1,51 @@ +step,stat/ep_constraint_violation +5000,31.25 +10000,25.88235294117647 +15000,33.166666666666664 +20000,27.357142857142858 +25000,41.61538461538461 +30000,83.875 +35000,125.42857142857143 +40000,203.4 +45000,145.0 +50000,271.3333333333333 +55000,162.66666666666666 +60000,191.5 +65000,164.5 +70000,123.0 +75000,169.5 +80000,118.5 +85000,147.0 +90000,172.0 +95000,199.0 +100000,112.5 +105000,177.5 +110000,201.0 +115000,187.0 +120000,172.5 +125000,154.0 +130000,129.0 +135000,175.5 +140000,205.5 +145000,136.0 +150000,126.5 +155000,89.0 +160000,193.5 +165000,218.5 +170000,144.5 +175000,178.5 +180000,216.5 +185000,166.5 +190000,133.5 +195000,154.5 +200000,110.5 +205000,285.5 +210000,217.5 +215000,199.5 +220000,104.5 +225000,129.5 +230000,166.0 +235000,214.0 +240000,280.5 +245000,207.0 +250000,231.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_length.log new file mode 100644 index 000000000..6926b0f4e --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_length.log @@ -0,0 +1,51 @@ +step,stat/ep_length +5000,79.33333333333333 +10000,58.05882352941177 +15000,79.75 +20000,69.85714285714286 +25000,76.0 +30000,120.5 +35000,137.71428571428572 +40000,173.2 +45000,188.4 +50000,316.3333333333333 +55000,291.3333333333333 +60000,375.0 +65000,375.0 +70000,375.0 +75000,375.0 +80000,375.0 +85000,375.0 +90000,375.0 +95000,375.0 +100000,375.0 +105000,375.0 +110000,375.0 +115000,375.0 +120000,375.0 +125000,375.0 +130000,375.0 +135000,375.0 +140000,375.0 +145000,375.0 +150000,375.0 +155000,375.0 +160000,375.0 +165000,375.0 +170000,375.0 +175000,375.0 +180000,375.0 +185000,375.0 +190000,375.0 +195000,375.0 +200000,375.0 +205000,375.0 +210000,375.0 +215000,375.0 +220000,375.0 +225000,375.0 +230000,375.0 +235000,375.0 +240000,375.0 +245000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_return.log new file mode 100644 index 000000000..5f521584a --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_return.log @@ -0,0 +1,51 @@ +step,stat/ep_return +5000,28.907749219780115 +10000,36.561346049331746 +15000,44.84131111799572 +20000,51.811898480433626 +25000,53.55233816686736 +30000,63.67126969392923 +35000,71.5200432408185 +40000,76.58823803754001 +45000,85.28194872529556 +50000,119.29420750339091 +55000,146.020331558664 +60000,199.91948673772617 +65000,213.0288184878138 +70000,233.83508690865432 +75000,247.51518469953848 +80000,262.09837877049586 +85000,296.36622912776943 +90000,308.65033515078073 +95000,333.0941442081554 +100000,321.54892725953283 +105000,331.93167547922917 +110000,327.266380250303 +115000,340.6748417813505 +120000,347.7777198772626 +125000,350.9562376741938 +130000,339.8086061675796 +135000,345.89272304013286 +140000,346.31522545151614 +145000,340.55884836117355 +150000,347.7667725357054 +155000,355.29345323027303 +160000,343.98597372338736 +165000,347.8863394343425 +170000,354.89386182425727 +175000,348.81897915254433 +180000,350.74860680018287 +185000,353.0020054757043 +190000,353.0612683828755 +195000,345.2733478616972 +200000,353.7653649838486 +205000,302.1734107016728 +210000,358.1918660970259 +215000,353.7213076462245 +220000,353.94223386129136 +225000,354.15565466079113 +230000,347.27842113354745 +235000,350.0533496770987 +240000,346.10218129325006 +245000,353.04813337432034 +250000,352.9196436840904 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_reward.log new file mode 100644 index 000000000..4478199fa --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat/ep_reward.log @@ -0,0 +1,51 @@ +step,stat/ep_reward +5000,0.3929553972820514 +10000,0.6484317065150944 +15000,0.5692890237634398 +20000,0.7440988435402323 +25000,0.708041053316584 +30000,0.5308795491079596 +35000,0.5236731584683643 +40000,0.45165821398323996 +45000,0.45281718004660504 +50000,0.3923698421702597 +55000,0.5621557727644683 +60000,0.5331186313006031 +65000,0.5680768493008368 +70000,0.6235602317564115 +75000,0.6600404925321026 +80000,0.6989290100546556 +85000,0.7903099443407184 +90000,0.8230675604020818 +95000,0.8882510512217476 +100000,0.8574638060254209 +105000,0.8851511346112779 +110000,0.8727103473341413 +115000,0.908466244750268 +120000,0.9274072530060338 +125000,0.9358833004645168 +130000,0.9061562831135457 +135000,0.9223805947736876 +140000,0.9235072678707097 +145000,0.9081569289631295 +150000,0.9273780600952144 +155000,0.9474492086140613 +160000,0.9172959299290331 +165000,0.9276969051582468 +170000,0.9463836315313527 +175000,0.9301839444067848 +180000,0.9353296181338209 +185000,0.9413386812685448 +190000,0.9414967156876679 +195000,0.9207289276311925 +200000,0.9433743066235962 +205000,0.8057957618711276 +210000,0.955178309592069 +215000,0.943256820389932 +220000,0.9438459569634436 +225000,0.9444150790954431 +230000,0.92607578968946 +235000,0.9334755991389299 +240000,0.9229391501153335 +245000,0.9414616889981876 +250000,0.941119049824241 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/constraint_violation.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/constraint_violation.log new file mode 100644 index 000000000..6ed87be3d --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/constraint_violation.log @@ -0,0 +1,51 @@ +step,stat_eval/constraint_violation +5000,180.0 +10000,142.3 +15000,117.4 +20000,286.3 +25000,232.8 +30000,210.8 +35000,179.4 +40000,196.0 +45000,240.3 +50000,253.9 +55000,224.8 +60000,218.0 +65000,170.2 +70000,10.4 +75000,61.0 +80000,71.0 +85000,198.7 +90000,116.5 +95000,35.2 +100000,92.5 +105000,167.4 +110000,106.2 +115000,85.4 +120000,138.4 +125000,138.8 +130000,103.9 +135000,144.4 +140000,111.2 +145000,181.5 +150000,130.2 +155000,204.8 +160000,155.8 +165000,142.2 +170000,133.3 +175000,83.0 +180000,89.5 +185000,118.8 +190000,120.4 +195000,131.4 +200000,110.7 +205000,73.7 +210000,139.7 +215000,168.9 +220000,195.0 +225000,173.0 +230000,155.5 +235000,116.7 +240000,179.7 +245000,154.4 +250000,180.3 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_length.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_length.log new file mode 100644 index 000000000..d807fe105 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_length.log @@ -0,0 +1,51 @@ +step,stat_eval/ep_length +5000,375.0 +10000,375.0 +15000,375.0 +20000,375.0 +25000,375.0 +30000,375.0 +35000,375.0 +40000,375.0 +45000,375.0 +50000,375.0 +55000,375.0 +60000,375.0 +65000,375.0 +70000,375.0 +75000,375.0 +80000,375.0 +85000,375.0 +90000,375.0 +95000,375.0 +100000,375.0 +105000,375.0 +110000,375.0 +115000,375.0 +120000,375.0 +125000,375.0 +130000,375.0 +135000,375.0 +140000,375.0 +145000,375.0 +150000,375.0 +155000,375.0 +160000,375.0 +165000,375.0 +170000,375.0 +175000,375.0 +180000,375.0 +185000,375.0 +190000,375.0 +195000,375.0 +200000,375.0 +205000,375.0 +210000,375.0 +215000,375.0 +220000,375.0 +225000,375.0 +230000,375.0 +235000,375.0 +240000,375.0 +245000,375.0 +250000,375.0 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_return.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_return.log new file mode 100644 index 000000000..ecf85f8ec --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_return.log @@ -0,0 +1,51 @@ +step,stat_eval/ep_return +5000,82.6248652299208 +10000,81.50937258947532 +15000,104.6088273428367 +20000,116.57794709921589 +25000,135.06048554237415 +30000,145.66334150462802 +35000,134.71434383716092 +40000,147.39956986825828 +45000,155.86953117001434 +50000,156.8534360821006 +55000,171.06959580103484 +60000,191.75891409751972 +65000,187.4052490381683 +70000,222.34737015646024 +75000,240.1093259145817 +80000,256.65840390566143 +85000,230.8022878190978 +90000,287.5314122346557 +95000,322.5216767274761 +100000,293.06942189201686 +105000,275.12954192662534 +110000,291.67241454943223 +115000,314.98068964213587 +120000,290.2026006913502 +125000,290.35476335435544 +130000,300.5760018553341 +135000,295.5201669287727 +140000,309.6073348641846 +145000,276.64925605122835 +150000,296.76606125168456 +155000,261.5931842985177 +160000,293.7296987313115 +165000,280.98840205538914 +170000,289.4900954457078 +175000,330.57670558791926 +180000,324.27005805740464 +185000,320.1332494971367 +190000,315.8451993595456 +195000,298.3369226691252 +200000,313.83853274608356 +205000,330.57949659847816 +210000,295.35981989152407 +215000,294.4977279761771 +220000,262.5433805956169 +225000,277.77033841899043 +230000,303.3181039241405 +235000,332.7306245552409 +240000,290.4216040534503 +245000,304.11444497799283 +250000,284.9536251486147 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_reward.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_reward.log new file mode 100644 index 000000000..5a78902f6 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/ep_reward.log @@ -0,0 +1,51 @@ +step,stat_eval/ep_reward +5000,0.2203329739464555 +10000,0.21735832690526752 +15000,0.2789568729142312 +20000,0.310874525597909 +25000,0.3601612947796644 +30000,0.3884355773456748 +35000,0.3592382502324291 +40000,0.39306551964868874 +45000,0.4156520831200384 +50000,0.41827582955226816 +55000,0.4561855888027595 +60000,0.5113571042600527 +65000,0.4997473307684488 +70000,0.5929263204172274 +75000,0.6402915357722179 +80000,0.6844224104150971 +85000,0.615472767517594 +90000,0.7667504326257484 +95000,0.8600578046066032 +100000,0.7815184583787117 +105000,0.7336787784710008 +110000,0.7777931054651526 +115000,0.8399485057123623 +120000,0.7738736018436005 +125000,0.7742793689449479 +130000,0.8015360049475577 +135000,0.7880537784767272 +140000,0.8256195596378257 +145000,0.7377313494699422 +150000,0.7913761633378255 +155000,0.6975818247960474 +160000,0.7832791966168307 +165000,0.7493024054810378 +170000,0.7719735878552209 +175000,0.881537881567785 +180000,0.8647201548197458 +185000,0.853688665325698 +190000,0.8422538649587882 +195000,0.7955651271176672 +200000,0.8369027539895562 +205000,0.8815453242626086 +210000,0.7876261863773975 +215000,0.785327274603139 +220000,0.7001156815883119 +225000,0.7407209024506409 +230000,0.8088482771310413 +235000,0.8872816654806426 +240000,0.7744576108092007 +245000,0.8109718532746475 +250000,0.7598763337296393 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/mse.log b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/mse.log new file mode 100644 index 000000000..5c847ebf1 --- /dev/null +++ b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/logs/stat_eval/mse.log @@ -0,0 +1,51 @@ +step,stat_eval/mse +5000,3.566401391550598 +10000,4.407049045987671 +15000,3.4000933905735264 +20000,3.9208267428358523 +25000,3.4408758184311488 +30000,2.76102525473667 +35000,3.0294046613634653 +40000,2.8812549389460895 +45000,2.802979498002708 +50000,2.957970672526278 +55000,2.420860085918058 +60000,2.2384483233512933 +65000,2.2763736696101153 +70000,0.9888395330391881 +75000,0.9056027645992442 +80000,0.6902333180715963 +85000,2.123008973647601 +90000,1.0393842211197346 +95000,0.28181172947096444 +100000,0.678609763129294 +105000,1.2863627258831536 +110000,0.7755020422803824 +115000,0.48072476145904675 +120000,0.9573849408037172 +125000,1.1817100868972612 +130000,0.7917206095287886 +135000,1.0230666761296598 +140000,0.5793766115181127 +145000,1.4294990153694367 +150000,1.3157858771369901 +155000,1.9042270576394718 +160000,0.9115451922116883 +165000,1.153451973249059 +170000,1.1347277441873374 +175000,0.3027352768596582 +180000,0.4323086133397765 +185000,0.42839511258298507 +190000,0.4471784198881982 +195000,0.7976256018915204 +200000,1.047175094597712 +205000,0.3084885220357293 +210000,0.7657971538560507 +215000,1.3553336945885743 +220000,1.3916594126378639 +225000,1.39346801236807 +230000,0.988228713776216 +235000,0.34614873051017814 +240000,0.9983409273692851 +245000,0.6167215835075731 +250000,1.1017689236858215 diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_best.pt b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_best.pt new file mode 100644 index 000000000..04d8a0544 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_best.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_latest.pt b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_latest.pt new file mode 100644 index 000000000..b2588b04a Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/model_latest.pt differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-approx_kl.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-approx_kl.jpg new file mode 100644 index 000000000..9e368b37f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-approx_kl.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-entropy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-entropy_loss.jpg new file mode 100644 index 000000000..cfdf3c14f Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-policy_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-policy_loss.jpg new file mode 100644 index 000000000..08ef2007b Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-policy_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-value_loss.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-value_loss.jpg new file mode 100644 index 000000000..15b7bde22 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-loss-value_loss.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_constraint_violation.jpg new file mode 100644 index 000000000..8c813f72d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_length.jpg new file mode 100644 index 000000000..237617cc7 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_return.jpg new file mode 100644 index 000000000..1cc7ff096 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_reward.jpg new file mode 100644 index 000000000..c50c98c22 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-constraint_violation.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-constraint_violation.jpg new file mode 100644 index 000000000..e3e09744b Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_length.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_length.jpg new file mode 100644 index 000000000..86bfcaf78 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_return.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_return.jpg new file mode 100644 index 000000000..a499d229d Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_reward.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_reward.jpg new file mode 100644 index 000000000..24afc4639 Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-mse.jpg b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-mse.jpg new file mode 100644 index 000000000..a3160b7dc Binary files /dev/null and b/experiments/crazyflie/models/rl_models/ppo/none_dm_t1/plots/-stat_eval-mse.jpg differ diff --git a/experiments/crazyflie/plotting_results.py b/experiments/crazyflie/plotting_results.py index 3bb3406c7..2c6d2f194 100644 --- a/experiments/crazyflie/plotting_results.py +++ b/experiments/crazyflie/plotting_results.py @@ -20,7 +20,7 @@ plot = False save_figs = True -suffix = '_dm_s5' +suffix = '_dm_t1' # suffix = '' ordered_models = [f'mpsf_0.1{suffix}', f'mpsf_1{suffix}', f'mpsf_10{suffix}', f'none{suffix}', f'none_cpen{suffix}'] @@ -73,6 +73,7 @@ def load_all_models(algo): reward = np.sum(np.exp(-dist)) model_data['rewards'].append(reward) + # TODO fix this constr_viols = np.sum(np.sum(np.abs(model_data['states'][-1][:, [0,1,2,3,6,7]]) > np.array([[0.95, 2, 0.95, 2, 0.25, 0.25]]), axis=1) > 0) model_data['constraint_violations'].append(constr_viols) @@ -297,7 +298,7 @@ def create_paper_plot(data_extractor): fig.savefig(f'./results_cf/{algo_name}/graphs/real/{image_suffix}.png', dpi=300) else: if suffix != '': - fig.savefig(f'./results_cf/{algo_name}/graphs/dm/{image_suffix}.png', dpi=300) + fig.savefig(f'./results_cf/{algo_name}/graphs/{suffix[1:]}/{image_suffix}.png', dpi=300) else: fig.savefig(f'./results_cf/{algo_name}/graphs/{image_suffix}.png', dpi=300) # tikzplotlib.save(f'./{image_suffix}.tex', axis_height='2.2in', axis_width='3.5in') @@ -362,7 +363,7 @@ def plot_log(algo, key, all_results): if save_figs: image_suffix = key.replace('/', '__') if suffix != '': - fig.savefig(f'./results_cf/{algo}/graphs/dm/{image_suffix}.png', dpi=300) + fig.savefig(f'./results_cf/{algo}/graphs/{suffix[1:]}/{image_suffix}.png', dpi=300) else: fig.savefig(f'./results_cf/{algo}/graphs/{image_suffix}.png', dpi=300) plt.close() diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/constraint_violations.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/constraint_violations.png new file mode 100644 index 000000000..573d84d1c Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/constraint_violations.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/feasible_iterations.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/feasible_iterations.png new file mode 100644 index 000000000..37e1411e0 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/feasible_iterations.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/length.png new file mode 100644 index 000000000..1070ecd78 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__approx_kl.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__approx_kl.png new file mode 100644 index 000000000..b8e383a08 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__approx_kl.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__entropy_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__entropy_loss.png new file mode 100644 index 000000000..04e55995c Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__entropy_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__policy_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__policy_loss.png new file mode 100644 index 000000000..123b480dd Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__policy_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__value_loss.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__value_loss.png new file mode 100644 index 000000000..4096497af Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/loss__value_loss.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/magnitude_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/magnitude_of_corrections.png new file mode 100644 index 000000000..e34d622d5 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/magnitude_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/max_correction.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/max_correction.png new file mode 100644 index 000000000..79b4d0228 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/max_correction.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/number_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/number_of_corrections.png new file mode 100644 index 000000000..e1c9fbd7a Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/number_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_magnitude_of_corrections.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_magnitude_of_corrections.png new file mode 100644 index 000000000..0a9c7fd7a Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_magnitude_of_corrections.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_max_correction.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_max_correction.png new file mode 100644 index 000000000..7e99ce6fb Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/percent_max_correction.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rate_of_change.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rate_of_change.png new file mode 100644 index 000000000..0b1e63bc4 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rate_of_change.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/reward.png new file mode 100644 index 000000000..b0149b668 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rmse.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rmse.png new file mode 100644 index 000000000..7cffaeaa2 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/rmse.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_constraint_violation.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_constraint_violation.png new file mode 100644 index 000000000..90fdfe16b Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_constraint_violation.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_length.png new file mode 100644 index 000000000..4ba1d5d5c Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_return.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_return.png new file mode 100644 index 000000000..397cadec6 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_return.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_reward.png new file mode 100644 index 000000000..7dec4ee79 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat__ep_reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__constraint_violation.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__constraint_violation.png new file mode 100644 index 000000000..feb198dd7 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__constraint_violation.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_length.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_length.png new file mode 100644 index 000000000..69b2c3e5e Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_length.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_return.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_return.png new file mode 100644 index 000000000..d4ae1468d Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_return.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_reward.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_reward.png new file mode 100644 index 000000000..30abc26c2 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__ep_reward.png differ diff --git a/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__mse.png b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__mse.png new file mode 100644 index 000000000..29c22348f Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/graphs/dm_t1/stat_eval__mse.png differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t1.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t1.pkl new file mode 100644 index 000000000..843006a99 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_0.1_dm_t1.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t1.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t1.pkl new file mode 100644 index 000000000..e5e25a68a Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_10_dm_t1.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t1.pkl b/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t1.pkl new file mode 100644 index 000000000..6861a2d35 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/mpsf_1_dm_t1.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t1.pkl b/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t1.pkl new file mode 100644 index 000000000..e0073fe8f Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/none_cpen_dm_t1.pkl differ diff --git a/experiments/crazyflie/results_cf/ppo/none_dm_t1.pkl b/experiments/crazyflie/results_cf/ppo/none_dm_t1.pkl new file mode 100644 index 000000000..258333990 Binary files /dev/null and b/experiments/crazyflie/results_cf/ppo/none_dm_t1.pkl differ diff --git a/experiments/crazyflie/test_crazyflie.sh b/experiments/crazyflie/test_crazyflie.sh index 54d1aea6a..dcf1d96f5 100755 --- a/experiments/crazyflie/test_crazyflie.sh +++ b/experiments/crazyflie/test_crazyflie.sh @@ -39,25 +39,25 @@ else SF_PEN_TAG="_$4" fi -TAG="$1${CONSTR_PEN_TAG}${SF_PEN_TAG}_dm_s5" +TAG="$1${CONSTR_PEN_TAG}${SF_PEN_TAG}_dm_t1" echo $TAG $SYS $ALGO $TASK -# python3 ./train_rl.py \ -# --task quadrotor \ -# --algo ${ALGO} \ -# --safety_filter ${MPSC} \ -# --overrides \ -# ./config_overrides/crazyflie_${TASK}.yaml \ -# ./config_overrides/${ALGO}_crazyflie.yaml \ -# ./config_overrides/nl_mpsc.yaml \ -# --output_dir ./models/rl_models/${ALGO}/${TAG} \ -# --kv_overrides \ -# sf_config.cost_function=one_step_cost \ -# algo_config.filter_train_actions=$FILTER \ -# algo_config.penalize_sf_diff=$FILTER \ -# algo_config.use_safe_reset=$FILTER \ -# algo_config.sf_penalty=$4 \ -# task_config.use_constraint_penalty=$3 +python3 ./train_rl.py \ + --task quadrotor \ + --algo ${ALGO} \ + --safety_filter ${MPSC} \ + --overrides \ + ./config_overrides/crazyflie_${TASK}.yaml \ + ./config_overrides/${ALGO}_crazyflie.yaml \ + ./config_overrides/nl_mpsc.yaml \ + --output_dir ./models/rl_models/${ALGO}/${TAG} \ + --kv_overrides \ + sf_config.cost_function=one_step_cost \ + algo_config.filter_train_actions=$FILTER \ + algo_config.penalize_sf_diff=$FILTER \ + algo_config.use_safe_reset=$FILTER \ + algo_config.sf_penalty=$4 \ + task_config.use_constraint_penalty=$3 python3 ./crazyflie_experiment.py \ --task quadrotor \ diff --git a/experiments/crazyflie/train_all_models.sh b/experiments/crazyflie/train_all_models.sh index 847a3b1dd..9ffefb2d8 100755 --- a/experiments/crazyflie/train_all_models.sh +++ b/experiments/crazyflie/train_all_models.sh @@ -3,6 +3,6 @@ for ALGO in ppo; do ./test_crazyflie.sh mpsf $ALGO False 0.1 #mpsf_sr_pen_0.1 ./test_crazyflie.sh mpsf $ALGO False 1 #mpsf_sr_pen_1 ./test_crazyflie.sh mpsf $ALGO False 10 #mpsf_sr_pen_10 - ./test_crazyflie.sh none $ALGO False False #none + # ./test_crazyflie.sh none $ALGO False False #none ./test_crazyflie.sh none $ALGO True False #none_cpen done