diff --git a/.gitignore b/.gitignore index 49e2bcb3f..31805cfd4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,13 @@ examples/mpsc/unsafe_rl_temp_data/ # examples/pid/*data/ # -Results/ +experiments/mpsc/results* +experiments/mpsc/models/rl_models* +# results/ z_docstring.py TODOs.md -# +# hpo_study*/ hp_study*/ comparisons/ @@ -155,10 +157,4 @@ dmypy.json .idea/ *c_generated_code/ -acados_ocp_nlp.json -gpmpc_acados_ocp_solver.json -gpmpc_update/ -temp -models/ -benchmarking_sim/quadrotor/acados_ocp.json -acados_ocp.json \ No newline at end of file +*acados_ocp*.json diff --git a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml index 4c51188d5..eca0f59be 100644 --- a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml +++ b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml @@ -1,34 +1,10 @@ algo: mpc_acados algo_config: - # horizon: 40 - # r_mpc: - # - 0.8 - # - 0.8 - # q_mpc: - # - 5.0 - # - 0.1 - # - 5.0 - # - 0.1 - # - 0.5 - # - 0.001 - # horizon: 25 - q_mpc: [18, 0.1, 18, 0.1, 0.5, 0.01] - r_mpc: [15., 5.] - # r_mpc: [3., 3.] - # horizon: 50 - # q_mpc: - # - 7.920690650196039 - # - 0.0001 - # - 2.284725973739269 - # - 0.00010069955204926803 - # - 0.017010527262423716 - # - 0.0001 - # r_mpc: - # - 0.0001 - # - 0.010206543973281433 + horizon: 25 + q_mpc: [10, 0.1, 10, 0.1, 0.1, 0.001] + r_mpc: [0.1, 0.1] prior_info: - # prior_prop: null - prior_prop: + prior_prop: M: 0.033 beta_1: 18.11 beta_2: 3.68 @@ -40,9 +16,7 @@ algo_config: randomize_prior_prop: False prior_prop_rand_info: null warmstart: True - # use_lqr_gain_and_terminal_cost: True - # soft_constraints: True - # use_RTI: True output_dir: ./mpc_acados/results - + soft_constraints: True + soft_penalty: 1000.0 diff --git a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml index 465e43ea3..f642f1586 100644 --- a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml +++ b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml @@ -3,7 +3,6 @@ task_config: ctrl_freq: 60 pyb_freq: 60 physics: dyn_si - # physics: pyb quad_type: 4 init_state: @@ -13,35 +12,9 @@ task_config: init_z_dot: 0 init_theta: 0 init_theta_dot: 0 - randomized_init: True + randomized_init: False randomized_inertial_prop: False - init_state_randomization_info: - init_x: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_x_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_z: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_z_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_theta: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_theta_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - task: traj_tracking task_info: trajectory_type: figure8 @@ -49,8 +22,6 @@ task_config: trajectory_plane: 'xz' trajectory_position_offset: [0, 1.] trajectory_scale: 1.0 - # ilqr_ref: True - # ilqr_traj_data: /home/mingxuan/Repositories/scg_tsung/examples/lqr/ilqr_ref_traj.npy inertial_prop: M: 0.033 @@ -64,24 +35,21 @@ task_config: pitch_bias: 0.0 # in radian episode_len_sec: 11 - cost: quadratic + cost: rl_reward + obs_goal_horizon: 0 + + # RL Reward + rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001] + rew_act_weight: [0.1, 0.1] + rew_exponential: True constraints: - constraint_form: default_constraint constrained_variable: state + upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] + lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] - constraint_form: default_constraint constrained_variable: input - # upper_bounds: [0.58212, 0.7] - # lower_bounds: [0.09702, -0.7] done_on_out_of_bound: True done_on_violation: False - disturbances: - # dynamics: # disturbance force in newton - # - disturbance_func: uniform - # low: 1. - # high: 1. - # mask: [1, 0, 0, 0] - observation: - - disturbance_func: white_noise - std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03] \ No newline at end of file diff --git a/benchmarking_sim/quadrotor/mb_experiment.py b/benchmarking_sim/quadrotor/mb_experiment.py index c4db39418..66fab3c0c 100644 --- a/benchmarking_sim/quadrotor/mb_experiment.py +++ b/benchmarking_sim/quadrotor/mb_experiment.py @@ -1,7 +1,7 @@ import os -import sys import pickle +import sys from collections import defaultdict from functools import partial @@ -10,17 +10,17 @@ from matplotlib.ticker import FormatStrFormatter from safe_control_gym.envs.benchmark_env import Task +from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor +from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import QuadType from safe_control_gym.experiments.base_experiment import BaseExperiment -from safe_control_gym.experiments.epoch_experiments import EpochExperiment from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.gpmpc_plotting import make_quad_plots from safe_control_gym.utils.registration import make from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, timing -from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor -from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import QuadType -from safe_control_gym.utils.gpmpc_plotting import make_quad_plots script_path = os.path.dirname(os.path.realpath(__file__)) + @timing def run(gui=False, n_episodes=1, n_steps=None, save_data=True): '''The main function running experiments for model-based methods. @@ -66,37 +66,37 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): sys.argv[1:] = ['--algo', ALGO, '--task', agent, '--overrides', - f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml', - f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml', + f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', '--seed', '1', '--use_gpu', 'True', '--output_dir', f'./{ALGO}/results', - ] + ] else: - MPSC_COST='one_step_cost' + MPSC_COST = 'one_step_cost' assert ALGO != 'gp_mpc', 'Safety filter not supported for gp_mpc' assert os.path.exists(f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml'), f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml does not exist' sys.argv[1:] = ['--algo', ALGO, '--task', agent, '--safety_filter', SAFETY_FILTER, '--overrides', - f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml', - f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', - f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml', + f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml', '--kv_overrides', f'sf_config.cost_function={MPSC_COST}', '--seed', '2', '--use_gpu', 'True', '--output_dir', f'./{ALGO}/results', - ] + ] fac = ConfigFactory() fac.add_argument('--func', type=str, default='train', help='main function to run.') fac.add_argument('--n_episodes', type=int, default=1, help='number of episodes to run.') # merge config and create output directory config = fac.merge() - if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP']: + if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP']: num_data_max = config.algo_config.num_epochs * config.algo_config.num_samples config.output_dir = os.path.join(config.output_dir, PRIOR + '_' + repr(num_data_max)) - print('output_dir', config.algo_config.output_dir) + print('output_dir', config.algo_config.output_dir) set_dir_from_config(config) config.algo_config.output_dir = config.output_dir mkdirs(config.output_dir) @@ -110,22 +110,36 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): random_env = env_func(gui=False) # Create controller. + config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99] + config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99] + config.task_config.constraints[1].upper_bounds = [0.59, 0.436] + config.task_config.constraints[1].lower_bounds = [0.113, -0.436] + + ctrl_env_func = partial(make, + config.task, + seed=config.seed, + **config.task_config + ) ctrl = make(config.algo, - env_func, + ctrl_env_func, seed=config.seed, **config.algo_config ) - + config.task_config.constraints[0].upper_bounds = [0.9, 2, 1.45, 2, 0.75, 3] + config.task_config.constraints[0].lower_bounds = [-0.9, -2, 0.55, -2, -0.75, -3] + config.task_config.constraints[1].upper_bounds = [0.59336579, 0.43633232] + config.task_config.constraints[1].lower_bounds = [0.11264675, -0.43633232] + # Setup safety filter if SAFETY_FILTER is not None: env_func_filter = partial(make, - config.task, - seed=config.seed, - **config.task_config) + config.task, + seed=config.seed, + **config.task_config) safety_filter = make(config.safety_filter, - env_func_filter, - seed=config.seed, - **config.sf_config) + env_func_filter, + seed=config.seed, + **config.sf_config) safety_filter.reset() all_trajs = defaultdict(list) @@ -140,19 +154,19 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) # Create experiment, train, and run evaluation - if SAFETY_FILTER is None: - if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP']: + if SAFETY_FILTER is None: + if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP']: experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) if config.algo_config.num_epochs == 1: print('Evaluating prior controller') elif config.algo_config.gp_model_path is not None: ctrl.load(config.algo_config.gp_model_path) else: - # manually launch training + # manually launch training # (NOTE: not using launch_training method since calling plotting before eval will break the eval) experiment.reset() train_runs, test_runs = ctrl.learn(env=static_train_env) - else: + else: experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) experiment.launch_training() else: @@ -169,21 +183,20 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): # plotting training and evaluation results # training - if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP'] and \ + if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP'] and \ config.algo_config.gp_model_path is None and \ config.algo_config.num_epochs > 1: - if isinstance(static_env, Quadrotor): - make_quad_plots(test_runs=test_runs, - train_runs=train_runs, - trajectory=ctrl.traj.T, - dir=ctrl.output_dir) - plot_quad_eval(trajs_data['obs'][0], - trajs_data['action'][0], - # trajs_data['current_clipped_action'][0], - ctrl.env, + if isinstance(static_env, Quadrotor): + make_quad_plots(test_runs=test_runs, + train_runs=train_runs, + trajectory=ctrl.traj.T, + dir=ctrl.output_dir) + plot_quad_eval(trajs_data['obs'][0], + trajs_data['action'][0], + # trajs_data['current_clipped_action'][0], + ctrl.env, config.output_dir) - # Close environments static_env.close() static_train_env.close() @@ -211,6 +224,8 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): print(f'pyb_client: {ctrl.env.PYB_CLIENT}') # def plot_quad_eval(state_stack, input_stack, clipped_action_stack, env, save_path=None): + + def plot_quad_eval(state_stack, input_stack, env, save_path=None): '''Plots the input and states to determine success. @@ -235,7 +250,7 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): reference = np.tile(reference.reshape(1, model.nx), (plot_length, 1)) # Plot states - fig, axs = plt.subplots(model.nx, figsize=(8, model.nx*1)) + fig, axs = plt.subplots(model.nx, figsize=(8, model.nx * 1)) for k in range(model.nx): axs[k].plot(times, np.array(state_stack).transpose()[k, 0:plot_length], label='actual') axs[k].plot(times, reference.transpose()[k, 0:plot_length], color='r', label='desired') @@ -252,7 +267,7 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): plt.savefig(os.path.join(save_path, 'state_trajectories.png')) # Plot inputs - _, axs = plt.subplots(model.nu, figsize=(8, model.nu*1)) + _, axs = plt.subplots(model.nu, figsize=(8, model.nu * 1)) if model.nu == 1: axs = [axs] for k in range(model.nu): @@ -270,9 +285,9 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): # plot the figure-eight fig, axs = plt.subplots(1) - axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], + axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], np.array(state_stack).transpose()[z_idx, 0:plot_length], label='actual') - axs.plot(reference.transpose()[x_idx, 0:plot_length], + axs.plot(reference.transpose()[x_idx, 0:plot_length], reference.transpose()[z_idx, 0:plot_length], color='r', label='desired') axs.set_xlabel('x [m]') axs.set_ylabel('z [m]') @@ -285,10 +300,10 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): print(f'Plots saved to {save_path}') fig, axs = plt.subplots(1) - axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], + axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], np.array(state_stack).transpose()[y_idx, 0:plot_length], label='actual') axs.plot(reference.transpose()[x_idx, 0:plot_length], - reference.transpose()[y_idx, 0:plot_length], color='r', label='desired') + reference.transpose()[y_idx, 0:plot_length], color='r', label='desired') axs.set_xlabel('x [m]') axs.set_ylabel('y [m]') axs.set_title('State path in x-y plane') @@ -297,9 +312,10 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): if save_path is not None: plt.savefig(os.path.join(save_path, 'state_xy_path.png')) - + # plt.show() + def wrap2pi_vec(angle_vec): '''Wraps a vector of angles between -pi and pi. diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml index a5009ab8c..099581cf8 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml @@ -1,7 +1,8 @@ +algo: ppo algo_config: # model args - hidden_dim: 64 - activation: "relu" + hidden_dim: 128 + activation: tanh # loss args gamma: 0.98 @@ -18,48 +19,15 @@ algo_config: critic_lr: 0.001 # runner args - max_env_steps: 660000 - rollout_batch_size: 5 + max_env_steps: 2640000 + rollout_batch_size: 1 rollout_steps: 660 eval_batch_size: 10 # misc - log_interval: 13200 - save_interval: 660000 + log_interval: 66000 + save_interval: 1320000 num_checkpoints: 0 - eval_interval: 13200 + eval_interval: 66000 eval_save_best: True tensorboard: False - -# algo_config: -# # model args -# hidden_dim: 64 -# activation: "tanh" - -# # loss args -# gamma: 0.98 -# use_gae: True -# gae_lambda: 0.9 -# clip_param: 0.2 -# target_kl: 2.32e-2 -# entropy_coef: 0.09 - -# # optim args -# opt_epochs: 20 -# mini_batch_size: 256 -# actor_lr: 0.0012 -# critic_lr: 0.0012 - -# # runner args -# max_env_steps: 396000 -# rollout_batch_size: 5 -# rollout_steps: 660 -# eval_batch_size: 10 - -# # misc -# log_interval: 13200 -# save_interval: 660000 -# num_checkpoints: 0 -# eval_interval: 13200 -# eval_save_best: True -# tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index 7cc1787d7..4110ab495 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -14,56 +14,8 @@ task_config: init_z_dot: 0 init_theta: 0 init_theta_dot: 0 - randomized_init: True - randomized_inertial_prop: True - - init_state_randomization_info: - init_x: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_x_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_z: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_z_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_theta: - distrib: 'uniform' - low: -0.05 - high: 0.05 - init_theta_dot: - distrib: 'uniform' - low: -0.05 - high: 0.05 - - inertial_prop_randomization_info: - beta_1: # Nominal: 18.11 - distrib: 'uniform' - low: -4 - high: 4 - beta_2: # Nominal: 3.68 - distrib: 'uniform' - low: -0.7 - high: 0.7 - alpha_1: # Nominal: -140.8 - distrib: 'uniform' - low: -5 - high: 10 - alpha_2: # Nominal: -13.4 - distrib: 'uniform' - low: -3 - high: 3 - alpha_3: # Nominal: 124.8 - distrib: 'uniform' - low: -5 - high: 5 + randomized_init: False + randomized_inertial_prop: False task: traj_tracking task_info: @@ -82,35 +34,17 @@ task_config: obs_goal_horizon: 1 # RL Reward - rew_state_weight: [10., .1, 10., .1, .1, 0.001] - rew_act_weight: [.1, .1] + rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001] + rew_act_weight: [0.1, 0.1] rew_exponential: True - disturbances: - dynamics: - - disturbance_func: white_noise - std: 0.05 - observation: - - disturbance_func: white_noise - std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.6e-01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - action: - - disturbance_func: impulse - magnitude: 0.01 - step_offset: 2 - duration: 1 - decary_rate: 1 - constraints: - constraint_form: default_constraint constrained_variable: state - # upper_bounds: [2, 1, 2, 1, 0.2, 2.5] - # lower_bounds: [-2, -1, 0, -1, -0.2, -2.5] + upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] + lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] - constraint_form: default_constraint constrained_variable: input - upper_bounds: [0.58212, 0.7] - lower_bounds: [0.09702, -0.7] - # upper_bounds: [0.47628, 0.4] - # lower_bounds: [0.079, -0.4] done_on_out_of_bound: True done_on_violation: False diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh index 48a8f9c65..fc8a3269c 100755 --- a/examples/rl/rl_experiment.sh +++ b/examples/rl/rl_experiment.sh @@ -34,11 +34,8 @@ do ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \ --kv_overrides \ algo_config.training=False \ - task_config.randomized_init=True \ task_config.task_info.num_cycles=2 \ task_config.task_info.ilqr_ref=False \ task_config.task_info.ilqr_traj_data='../lqr/ilqr_ref_traj.npy' \ task_config.noise_scale=${NS} -# --pretrain_path ./Results/Benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED} done -#done \ No newline at end of file diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh index 8e74f590c..1d17f7a55 100755 --- a/examples/rl/train_rl_model.sh +++ b/examples/rl/train_rl_model.sh @@ -54,11 +54,9 @@ do --overrides \ ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \ ./config_overrides/${SYS}/${SYS}_${TASK}.yaml \ - --output_dir ./Results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \ + --output_dir ./results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \ --seed ${SEED} \ - --use_gpu \ - --kv_overrides \ - task_config.randomized_init=True + --use_gpu done # Move the newly trained unsafe model. diff --git a/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..21146f069 --- /dev/null +++ b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml @@ -0,0 +1,32 @@ +safety_filter: mpsc_acados +sf_config: + # LQR controller parameters + q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] + r_mpc: [3., 3.] + + prior_info: + prior_prop: + beta_1: 18.11298 + beta_2: 3.6800 + beta_3: 0 + alpha_1: -140.8 + alpha_2: -13.4 + alpha_3: 124.8 + randomize_prior_prop: False + prior_prop_rand_info: null + + # MPC Parameters + use_acados: True + horizon: 25 + warmstart: True + integration_algo: rk4 + use_terminal_set: False + + # Cost function + cost_function: one_step_cost + mpsc_cost_horizon: 5 + decay_factor: 0.85 + + # Softening + soften_constraints: True + slack_cost: 1000.0 diff --git a/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..c9a8f8f93 --- /dev/null +++ b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml @@ -0,0 +1,39 @@ +algo: ppo +algo_config: + # model args + hidden_dim: 128 + activation: tanh + + # loss args + gamma: 0.98 + use_gae: True + gae_lambda: 0.92 + clip_param: 0.2 + target_kl: 1.0e-2 + entropy_coef: 0.005 + + # optim args + opt_epochs: 20 + mini_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + + # runner args + max_env_steps: 2640000 + rollout_batch_size: 1 + rollout_steps: 660 + eval_batch_size: 10 + + # misc + log_interval: 66000 + save_interval: 1320000 + num_checkpoints: 0 + eval_interval: 66000 + eval_save_best: True + tensorboard: False + + # safety filter + filter_train_actions: False + penalize_sf_diff: False + sf_penalty: 1 + use_safe_reset: False diff --git a/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml new file mode 100644 index 000000000..4e127affa --- /dev/null +++ b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml @@ -0,0 +1,48 @@ +task_config: + info_in_reset: True + ctrl_freq: 60 + pyb_freq: 60 + physics: dyn_si + quad_type: 4 + + init_state: + init_x: 0 + init_x_dot: 0 + init_z: 1.0 + init_z_dot: 0 + init_theta: 0 + init_theta_dot: 0 + randomized_init: False + randomized_inertial_prop: False + + task: traj_tracking + task_info: + trajectory_type: figure8 + num_cycles: 2 + trajectory_plane: 'xz' + trajectory_position_offset: [0, 1.] + trajectory_scale: 1.0 + + inertial_prop: + M: 0.033 + Iyy: 1.4e-05 + + episode_len_sec: 11 + cost: rl_reward + obs_goal_horizon: 1 + + # RL Reward + rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001] + rew_act_weight: [0.1, 0.1] + rew_exponential: True + + constraints: + - constraint_form: default_constraint + constrained_variable: state + upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] + lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] + - constraint_form: default_constraint + constrained_variable: input + + done_on_out_of_bound: True + done_on_violation: False diff --git a/experiments/mpsc/mpsc_experiment.py b/experiments/mpsc/mpsc_experiment.py new file mode 100644 index 000000000..a82b0329b --- /dev/null +++ b/experiments/mpsc/mpsc_experiment.py @@ -0,0 +1,173 @@ +'''This script tests the MPSC safety filter implementation.''' + +import pickle +import shutil +from functools import partial + +import matplotlib.pyplot as plt +import numpy as np + +from safe_control_gym.experiments.base_experiment import BaseExperiment, MetricExtractor +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.registration import make + + +def run(plot=False, training=False, model='ppo'): + '''Main function to run MPSC experiments. + + Returns: + X_GOAL (np.ndarray): The goal (stabilization or reference trajectory) of the experiment. + uncert_results (dict): The results of the uncertified experiment. + uncert_metrics (dict): The metrics of the uncertified experiment. + cert_results (dict): The results of the certified experiment. + cert_metrics (dict): The metrics of the certified experiment. + ''' + + # Create the configuration dictionary. + fac = ConfigFactory() + config = fac.merge() + config.algo_config['training'] = False + config.task_config['done_on_violation'] = False + config.task_config['randomized_init'] = False + + system = 'quadrotor_2D_attitude' + + # Create an environment + env_func = partial(make, + config.task, + **config.task_config) + env = env_func() + + config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99] + config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99] + config.task_config.constraints[1].upper_bounds = [0.59, 0.436] + config.task_config.constraints[1].lower_bounds = [0.113, -0.436] + env_func = partial(make, + config.task, + **config.task_config) + + # Setup controller. + ctrl = make(config.algo, + env_func, + **config.algo_config, + output_dir='./temp') + + if config.algo in ['ppo', 'sac']: + # Load state_dict from trained. + ctrl.load(f'./models/rl_models/{model}/model_best.pt') + + # Remove temporary files and directories + shutil.rmtree('./temp', ignore_errors=True) + + # Run without safety filter + experiment = BaseExperiment(env, ctrl) + uncert_results, uncert_metrics = experiment.run_evaluation(n_episodes=1) + ctrl.reset() + + # Setup MPSC. + safety_filter = make(config.safety_filter, + env_func, + **config.sf_config) + safety_filter.reset() + + if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST: + safety_filter.cost_function.uncertified_controller = ctrl + safety_filter.cost_function.output_dir = '.' + + if training is True: + train_env = env_func(randomized_init=True, + init_state=None, + normalized_rl_action_space=False, + ) + safety_filter.learn(env=train_env) + safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + raise SystemExit + else: + safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + + # Run with safety filter + experiment = BaseExperiment(env, ctrl, safety_filter=safety_filter) + cert_results, cert_metrics = experiment.run_evaluation(n_episodes=1) + experiment.close() + safety_filter.close() + + if plot is True: + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(uncert_results['state'][0][:, 0], uncert_results['state'][0][:, 2], label='Uncertified', color='red') + ax.plot(cert_results['state'][0][:, 0], cert_results['state'][0][:, 2], label='Certified', color='green') + ax.plot(env.X_GOAL[:, 0], env.X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot') + rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6') + rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6') + ax.add_patch(rec1) + ax.add_patch(rec2) + rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6') + rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6') + ax.add_patch(rec3) + ax.add_patch(rec4) + plt.xlim(-1.1, 1.1) + plt.ylim(0.45, 1.55) + plt.xlabel('x [m]') + plt.ylabel('z [m]') + plt.legend() + plt.show() + + elapsed_time_uncert = uncert_results['timestamp'][0][-1] - uncert_results['timestamp'][0][0] + elapsed_time_cert = cert_results['timestamp'][0][-1] - cert_results['timestamp'][0][0] + + mpsc_results = cert_results['safety_filter_data'][0] + corrections = mpsc_results['correction'][0] * 10.0 > np.linalg.norm(cert_results['current_physical_action'][0] - safety_filter.U_EQ[0], axis=1) + corrections = np.append(corrections, False) + + print('Total Uncertified (s):', elapsed_time_uncert) + print('Total Certified Time (s):', elapsed_time_cert) + print('Number of Corrections:', np.sum(corrections)) + print('Sum of Corrections:', np.linalg.norm(mpsc_results['correction'][0])) + print('Max Correction:', np.max(np.abs(mpsc_results['correction'][0]))) + print('Number of Feasible Iterations:', np.sum(mpsc_results['feasible'][0])) + print('Total Number of Iterations:', uncert_metrics['average_length']) + print('Total Number of Certified Iterations:', cert_metrics['average_length']) + print('Number of Violations:', uncert_metrics['average_constraint_violation']) + print('Number of Certified Violations:', cert_metrics['average_constraint_violation']) + + return env.X_GOAL, uncert_results, uncert_metrics, cert_results, cert_metrics + + +def run_multiple_models(plot, all_models): + '''Runs all models at every saved starting point.''' + + fac = ConfigFactory() + config = fac.merge() + + for model in all_models: + print(model) + for i in range(25 if not plot else 1): + X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model) + if i == 0: + all_uncert_results, all_cert_results = uncert_results, cert_results + else: + for key in all_cert_results.keys(): + if key in all_uncert_results: + all_uncert_results[key].append(uncert_results[key][0]) + all_cert_results[key].append(cert_results[key][0]) + + met = MetricExtractor() + uncert_metrics = met.compute_metrics(data=all_uncert_results) + cert_metrics = met.compute_metrics(data=all_cert_results) + + all_results = {'uncert_results': all_uncert_results, + 'uncert_metrics': uncert_metrics, + 'cert_results': all_cert_results, + 'cert_metrics': cert_metrics, + 'config': config, + 'X_GOAL': X_GOAL} + + if not plot: + with open(f'./results_mpsc/{model}.pkl', 'wb') as f: + pickle.dump(all_results, f) + + +if __name__ == '__main__': + # run(plot=True, training=False, model='none') + run_multiple_models(plot=True, all_models=['mpsf']) diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh new file mode 100755 index 000000000..7b0bf4358 --- /dev/null +++ b/experiments/mpsc/mpsc_experiment.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +SYS='quadrotor_2D_attitude' +TASK='tracking' +ALGO='ppo' + +SAFETY_FILTER='mpsc_acados' +# MPSC_COST='one_step_cost' +MPSC_COST='precomputed_cost' +MPSC_COST_HORIZON=25 +DECAY_FACTOR=1 + +python3 ./mpsc_experiment.py \ + --task quadrotor \ + --algo ${ALGO} \ + --safety_filter ${SAFETY_FILTER} \ + --overrides \ + ./config_overrides/${SYS}_${TASK}.yaml \ + ./config_overrides/${ALGO}_${SYS}.yaml \ + ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \ + --kv_overrides \ + sf_config.cost_function=${MPSC_COST} \ + sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \ + sf_config.decay_factor=${DECAY_FACTOR} diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py new file mode 100644 index 000000000..d2e9ecd89 --- /dev/null +++ b/experiments/mpsc/plotting_results.py @@ -0,0 +1,507 @@ +'''This script analyzes and plots the results from MPSC experiments.''' + +import pickle +import sys + +import matplotlib.pyplot as plt +import numpy as np + +from safe_control_gym.experiments.base_experiment import MetricExtractor +from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative +from safe_control_gym.utils.plotting import load_from_logs + +plot = True # Saves figure if False + +U_EQ = np.array([0.3, 0]) + +met = MetricExtractor() +met.verbose = False + + +def load_all_models(system, task, algo): + '''Loads the results of every experiment. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + + Returns: + all_results (dict): A dictionary containing all the results. + ''' + + all_results = {} + + for model in ordered_models: + with open(f'./results_mpsc/{model}.pkl', 'rb') as f: + all_results[model] = pickle.load(f) + + return all_results + + +def extract_magnitude_of_corrections(results_data): + '''Extracts the magnitude of corrections from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + magn_of_corrections (list): The list of magnitude of corrections for all experiments. + ''' + + magn_of_corrections = [np.linalg.norm(mpsc_results['correction'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + return magn_of_corrections + + +def extract_max_correction(results_data): + '''Extracts the max correction from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + max_corrections (list): The list of max corrections for all experiments. + ''' + max_corrections = [np.max(np.abs(mpsc_results['correction'][0])) for mpsc_results in results_data['cert_results']['safety_filter_data']] + + return max_corrections + + +def extract_number_of_corrections(results_data): + '''Extracts the number of corrections from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + num_corrections (list): The list of the number of corrections for all experiments. + ''' + num_corrections = [np.sum(mpsc_results['correction'][0] * 10.0 > np.linalg.norm(results_data['cert_results']['current_clipped_action'][i] - U_EQ, axis=1)) for i, mpsc_results in enumerate(results_data['cert_results']['safety_filter_data'])] + return num_corrections + + +def extract_feasible_iterations(results_data): + '''Extracts the number of feasible iterations from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + feasible_iterations (list): The list of the number of feasible iterations for all experiments. + ''' + feasible_iterations = [np.sum(mpsc_results['feasible'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + return feasible_iterations + + +def extract_rmse(results_data, certified=True): + '''Extracts the RMSEs from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + rmse (list): The list of RMSEs for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + rmse = np.asarray(met.get_episode_rmse()) + else: + met.data = results_data['uncert_results'] + rmse = np.asarray(met.get_episode_rmse()) + return rmse + + +def extract_length(results_data, certified=True): + '''Extracts the lengths from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + length (list): The list of lengths for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + length = np.asarray(met.get_episode_lengths()) + else: + met.data = results_data['uncert_results'] + length = np.asarray(met.get_episode_lengths()) + return length + + +def extract_simulation_time(results_data, certified=True): + '''Extracts the simulation time from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + sim_time (list): The list of simulation times for all experiments. + ''' + if certified: + sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['cert_results']['timestamp']] + else: + sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['uncert_results']['timestamp']] + + return sim_time + + +def extract_constraint_violations(results_data, certified=True): + '''Extracts the simulation time from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + num_violations (list): The list of number of constraint violations for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + num_violations = np.asarray(met.get_episode_constraint_violation_steps()) + else: + met.data = results_data['uncert_results'] + num_violations = np.asarray(met.get_episode_constraint_violation_steps()) + + return num_violations + + +def extract_rate_of_change(results_data, certified=True, order=1, mode='input'): + '''Extracts the rate of change of a signal from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + order (int): Either 1 or 2, denoting the order of the derivative. + mode (string): Either 'input' or 'correction', denoting which signal to use. + + Returns: + roc (list): The list of rate of changes. + ''' + n = min(results_data['cert_results']['current_clipped_action'][0].shape) + + if mode == 'input': + if certified: + all_signals = [actions - U_EQ for actions in results_data['cert_results']['current_clipped_action']] + else: + all_signals = [actions - U_EQ for actions in results_data['uncert_results']['current_clipped_action']] + elif mode == 'correction': + all_signals = [np.squeeze(mpsc_results['uncertified_action'][0]) - np.squeeze(mpsc_results['certified_action'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + + total_derivatives = [] + for signal in all_signals: + if n == 1: + ctrl_freq = 15 + if mode == 'correction': + signal = np.atleast_2d(signal).T + elif n > 1: + ctrl_freq = 50 + derivative = get_discrete_derivative(signal, ctrl_freq) + if order == 2: + derivative = get_discrete_derivative(derivative, ctrl_freq) + total_derivatives.append(np.linalg.norm(derivative, 'fro')) + + return total_derivatives + + +def extract_reward(results_data, certified): + '''Extracts the mean reward from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + mean_reward (list): The list of mean rewards. + ''' + if certified: + met.data = results_data['cert_results'] + returns = np.asarray(met.get_episode_returns()) + else: + met.data = results_data['uncert_results'] + returns = np.asarray(met.get_episode_returns()) + + return returns + + +def extract_failed(results_data, certified): + '''Extracts the percent failed from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + failed (list): The percent failed. + ''' + if certified: + data = results_data['cert_results'] + else: + data = results_data['uncert_results'] + + failed = [data['info'][i][-1]['out_of_bounds'] for i in range(len(data['info']))] + + return [np.mean(failed)] + + +def plot_model_comparisons(system, task, algo, data_extractor): + '''Plots the constraint violations of every controller for a specific experiment. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + data_extractor (func): The function which extracts the desired data. + ''' + + all_results = load_all_models(system, task, algo) + + fig = plt.figure(figsize=(16.0, 10.0)) + ax = fig.add_subplot(111) + + labels = ordered_models + + data = [] + + for model in ordered_models: + exp_data = all_results[model] + data.append(data_extractor(exp_data)) + + ylabel = data_extractor.__name__.replace('extract_', '').replace('_', ' ').title() + ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10) + + x = np.arange(1, len(labels) + 1) + ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right') + + medianprops = dict(linestyle='--', linewidth=2.5, color='black') + bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) + + for patch, color in zip(bplot['boxes'], colors.values()): + patch.set_facecolor(color) + + fig.tight_layout() + ax.set_ylim(ymin=0) + + ax.yaxis.grid(True) + + if plot is True: + plt.show() + else: + image_suffix = data_extractor.__name__.replace('extract_', '') + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) + plt.close() + + +def normalize_actions(actions): + '''Normalizes an array of actions. + + Args: + actions (ndarray): The actions to be normalized. + + Returns: + normalized_actions (ndarray): The normalized actions. + ''' + if system_name == 'cartpole': + action_scale = 10.0 + normalized_actions = actions / action_scale + elif system_name == 'quadrotor_2D': + hover_thrust = 0.1323 + norm_act_scale = 0.1 + normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale + else: + hover_thrust = 0.06615 + norm_act_scale = 0.1 + normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale + + return normalized_actions + + +def plot_all_logs(system, task, algo): + '''Plots comparative plots of all the logs. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + ''' + all_results = {} + + for model in ordered_models: + all_results[model] = [] + all_results[model].append(load_from_logs(f'./models/rl_models/{model}/logs/')) + + for key in all_results[ordered_models[0]][0].keys(): + if key == 'stat_eval/ep_return': + plot_log(key, all_results) + if key == 'stat/constraint_violation': + plot_log(key, all_results) + + +def plot_log(key, all_results): + '''Plots a comparative plot of the log 'key'. + + Args: + key (str): The name of the log to be plotted. + all_results (dict): A dictionary of all the logged results for all models. + ''' + fig = plt.figure(figsize=(16.0, 10.0)) + ax = fig.add_subplot(111) + + labels = ordered_models + + for model, label in zip(ordered_models, labels): + x = all_results[model][0][key][1] / 1000 + all_data = np.array([values[key][3] for values in all_results[model]]) + ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model]) + ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model]) + + ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10) + ax.set_xlabel('Training Episodes') + ax.legend() + + fig.tight_layout() + ax.yaxis.grid(True) + + if plot is True: + plt.show() + else: + image_suffix = key.replace('/', '__') + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) + plt.close() + + +def benchmark_plot(system, task, algo): + all_results = load_all_models(system, task, algo) + X_GOAL = all_results['mpsf']['X_GOAL'] + + uncert = all_results['none']['uncert_results'] + mpsf = all_results['mpsf']['cert_results'] + none = all_results['none']['cert_results'] + mpc = all_results['mpc_acados']['trajs_data'] + + for i in [0]: + print('Uncert') + met.data = uncert + print('num_violations', calculate_state_violations(uncert, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + + print('\nNone') + met.data = none + print('num_violations', calculate_state_violations(none, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + + print('\nMPSF') + met.data = mpsf + print('num_violations', calculate_state_violations(mpsf, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + print('---------') + + print('\nMPC') + met.data = mpc + print('num_violations', calculate_state_violations(mpc, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + print('---------') + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(uncert['state'][i][:, 0], uncert['state'][i][:, 2], label='Uncertified', color='red') + ax.plot(none['state'][i][:, 0], none['state'][i][:, 2], label='Certified (Std.)', color='cornflowerblue') + ax.plot(mpsf['state'][i][:, 0], mpsf['state'][i][:, 2], label='Certified (Ours)', color='forestgreen') + ax.plot(mpc['state'][i][:, 0], mpc['state'][i][:, 2], label='MPC', color='plum') + ax.plot(X_GOAL[:, 0], X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot') + rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6') + rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6') + ax.add_patch(rec1) + ax.add_patch(rec2) + rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6') + rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6') + ax.add_patch(rec3) + ax.add_patch(rec4) + plt.xlim(-1.1, 1.1) + plt.ylim(0.45, 1.55) + plt.xlabel('x [m]') + plt.ylabel('z [m]') + plt.legend() + plt.show() + + +def calculate_state_violations(data, i): + states = data['state'][i] + num_viols = np.sum(np.any(states[:, [0, 2]] > [0.9, 1.45], axis=1) | np.any(states[:, [0, 2]] < [-0.9, 0.55], axis=1)) + return num_viols + + +if __name__ == '__main__': + ordered_models = ['none', 'mpsf', 'mpc_acados'] + + colors = { + 'mpsf': 'royalblue', + 'none': 'plum', + } + + def extract_rate_of_change_of_inputs(results_data, certified=True): + return extract_rate_of_change(results_data, certified, order=1, mode='input') + + def extract_roc_cert(results_data, certified=True): + return extract_rate_of_change_of_inputs(results_data, certified) + + def extract_roc_uncert(results_data, certified=False): + return extract_rate_of_change_of_inputs(results_data, certified) + + def extract_rmse_cert(results_data, certified=True): + return extract_rmse(results_data, certified) + + def extract_rmse_uncert(results_data, certified=False): + return extract_rmse(results_data, certified) + + def extract_constraint_violations_cert(results_data, certified=True): + return extract_constraint_violations(results_data, certified) + + def extract_constraint_violations_uncert(results_data, certified=False): + return extract_constraint_violations(results_data, certified) + + def extract_reward_cert(results_data, certified=True): + return extract_reward(results_data, certified) + + def extract_reward_uncert(results_data, certified=False): + return extract_reward(results_data, certified) + + def extract_failed_cert(results_data, certified=True): + return extract_failed(results_data, certified) + + def extract_failed_uncert(results_data, certified=False): + return extract_failed(results_data, certified) + + def extract_length_cert(results_data, certified=True): + return extract_length(results_data, certified) + + def extract_length_uncert(results_data, certified=False): + return extract_length(results_data, certified) + + system_name = 'quadrotor_2D_attitude' + task_name = 'track' + algo_name = 'ppo' + if len(sys.argv) == 4: + system_name = sys.argv[1] + task_name = sys.argv[2] + algo_name = sys.argv[3] + + benchmark_plot(system_name, task_name, algo_name) + # plot_all_logs(system_name, task_name, algo_name) + # plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections) + # plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction) + # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_number_of_corrections) + # plot_model_comparisons(system_name, task_name, algo_name, extract_length_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_length_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_feasible_iterations) diff --git a/experiments/mpsc/train_model.sh b/experiments/mpsc/train_model.sh new file mode 100755 index 000000000..5fd1696b0 --- /dev/null +++ b/experiments/mpsc/train_model.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +SYS='quadrotor_2D_attitude' +TASK='tracking' +ALGO='ppo' + +SAFETY_FILTER='mpsc_acados' +MPSC_COST='one_step_cost' +FILTER=True +SF_PEN=0.03 + +if [ "$FILTER" == 'True' ]; then + TAG=mpsf +else + TAG=none +fi + +# Train the unsafe controller/agent. +python3 train_rl.py \ + --task quadrotor \ + --algo ${ALGO} \ + --safety_filter ${SAFETY_FILTER} \ + --overrides \ + ./config_overrides/${SYS}_${TASK}.yaml \ + ./config_overrides/${ALGO}_${SYS}.yaml \ + ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \ + --output_dir ./models/rl_models/${TAG}/ \ + --seed 2 \ + --kv_overrides \ + sf_config.cost_function=${MPSC_COST} \ + sf_config.soften_constraints=True \ + algo_config.filter_train_actions=${FILTER} \ + algo_config.use_safe_reset=False \ + algo_config.penalize_sf_diff=${FILTER} \ + algo_config.sf_penalty=${SF_PEN} \ diff --git a/experiments/mpsc/train_rl.py b/experiments/mpsc/train_rl.py new file mode 100644 index 000000000..a3f79014a --- /dev/null +++ b/experiments/mpsc/train_rl.py @@ -0,0 +1,94 @@ +'''Template training/plotting/testing script.''' + +import os +import shutil +import time +from functools import partial + +import munch +import yaml + +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.plotting import plot_from_logs +from safe_control_gym.utils.registration import make +from safe_control_gym.utils.utils import mkdirs, set_device_from_config, set_seed_from_config + + +def train(): + '''Training template. + + TODO: Add restore functionality + ''' + # Create the configuration dictionary. + fac = ConfigFactory() + config = fac.merge() + config.algo_config['training'] = True + + shutil.rmtree(config.output_dir, ignore_errors=True) + + system = 'quadrotor_2D_attitude' + + set_seed_from_config(config) + set_device_from_config(config) + + # Define function to create task/env. + env_func = partial(make, + config.task, + output_dir=config.output_dir, + **config.task_config + ) + + # Create the controller/control_agent. + ctrl = make(config.algo, + env_func, + checkpoint_path=os.path.join(config.output_dir, 'model_latest.pt'), + output_dir=config.output_dir, + **config.algo_config) + ctrl.reset() + + # Setup MPSC. + if config.algo in ['ppo', 'sac']: + safety_filter = make(config.safety_filter, + env_func, + **config.sf_config) + safety_filter.reset() + + if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST: + safety_filter.cost_function.uncertified_controller = ctrl + safety_filter.cost_function.output_dir = '.' + + safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + + ctrl.safety_filter = safety_filter + + # Training. + start_time = time.time() + ctrl.learn() + config['logging'] = {'total_learning_time': time.time() - start_time} + ctrl.close() + print('Training done.') + + with open(os.path.join(config.output_dir, 'config.yaml'), 'w', encoding='UTF-8') as file: + yaml.dump(munch.unmunchify(config), file, default_flow_style=False) + + make_plots(config) + + +def make_plots(config): + '''Produces plots for logged stats during training. + Usage + * use with `--func plot` and `--restore {dir_path}` where `dir_path` is + the experiment folder containing the logs. + * save figures under `dir_path/plots/`. + ''' + # Define source and target log locations. + log_dir = os.path.join(config.output_dir, 'logs') + plot_dir = os.path.join(config.output_dir, 'plots') + mkdirs(plot_dir) + plot_from_logs(log_dir, plot_dir, window=3) + print('Plotting done.') + + +if __name__ == '__main__': + train() diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 82579603e..c70fc5b0d 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -1,4 +1,4 @@ -"""Proximal Policy Optimization (PPO) +'''Proximal Policy Optimization (PPO) Based on: * https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/ppo/ppo.py @@ -10,7 +10,7 @@ * pytorch-a2c-ppo-acktr-gail - https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail * openai spinning up - ppo - https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ppo * stable baselines3 - ppo - https://github.com/DLR-RM/stable-baselines3/tree/master/stable_baselines3/ppo -""" +''' import os import time @@ -30,7 +30,7 @@ class PPO(BaseController): - """Proximal policy optimization.""" + '''Proximal policy optimization.''' def __init__(self, env_func, @@ -40,6 +40,10 @@ def __init__(self, use_gpu=False, seed=0, **kwargs): + self.filter_train_actions = False + self.penalize_sf_diff = False + self.sf_penalty = 1 + self.use_safe_reset = False super().__init__(env_func, training, checkpoint_path, output_dir, use_gpu, seed, **kwargs) # Task. @@ -49,6 +53,7 @@ def __init__(self, self.env = VecRecordEpisodeStatistics(self.env, self.deque_size) self.eval_env = env_func(seed=seed * 111) self.eval_env = RecordEpisodeStatistics(self.eval_env, self.deque_size) + self.model = self.get_prior(self.eval_env, self.prior_info) else: # Testing only. self.env = env_func() @@ -84,8 +89,11 @@ def __init__(self, use_tensorboard = False self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard) + # Adding safety filter + self.safety_filter = None + def reset(self): - """Do initializations for training or evaluation.""" + '''Do initializations for training or evaluation.''' if self.training: # set up stats tracking self.env.add_tracker('constraint_violation', 0) @@ -94,7 +102,9 @@ def reset(self): self.eval_env.add_tracker('mse', 0, mode='queue') self.total_steps = 0 - obs, _ = self.env.reset() + obs, info = self.env_reset(self.env, self.use_safe_reset) + self.info = info['n'][0] + self.true_obs = obs self.obs = self.obs_normalizer(obs) else: # Add episodic stats to be tracked. @@ -103,16 +113,16 @@ def reset(self): self.env.add_tracker('mse', 0, mode='queue') def close(self): - """Shuts down and cleans up lingering resources.""" + '''Shuts down and cleans up lingering resources.''' self.env.close() if self.training: self.eval_env.close() self.logger.close() def save(self, - path + path, ): - """Saves model params and experiment state to checkpoint path.""" + '''Saves model params and experiment state to checkpoint path.''' path_dir = os.path.dirname(path) os.makedirs(path_dir, exist_ok=True) state_dict = { @@ -131,10 +141,10 @@ def save(self, torch.save(state_dict, path) def load(self, - path + path, ): - """Restores model and experiment given checkpoint path.""" - state = torch.load(path) + '''Restores model and experiment given checkpoint path.''' + state = torch.load(path, map_location=torch.device('cpu')) # Restore policy. self.agent.load_state_dict(state['agent']) self.obs_normalizer.load_state_dict(state['obs_normalizer']) @@ -151,45 +161,26 @@ def learn(self, env=None, **kwargs ): - """Performs learning (pre-training, training, fine-tuning, etc.).""" - - # Initial Evaluation. - eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size) - self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format( - eval_results['ep_lengths'].mean(), - eval_results['ep_lengths'].std(), - eval_results['ep_returns'].mean(), - eval_results['ep_returns'].std())) - - if self.num_checkpoints > 0: - step_interval = np.linspace(0, self.max_env_steps, self.num_checkpoints) - interval_save = np.zeros_like(step_interval, dtype=bool) + '''Performs learning (pre-training, training, fine-tuning, etc).''' while self.total_steps < self.max_env_steps: results = self.train_step() # Checkpoint. - if (self.total_steps >= self.max_env_steps - or (self.save_interval and self.total_steps % self.save_interval == 0)): + if self.total_steps >= self.max_env_steps or (self.save_interval and self.total_steps % self.save_interval == 0): # Latest/final checkpoint. self.save(self.checkpoint_path) self.logger.info(f'Checkpoint | {self.checkpoint_path}') - path = os.path.join(self.output_dir, 'checkpoints', 'model_{}.pt'.format(self.total_steps)) + if self.num_checkpoints and self.total_steps % (self.max_env_steps // self.num_checkpoints) == 0: + # Intermediate checkpoint. + path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt') self.save(path) - if self.num_checkpoints > 0: - interval_id = np.argmin(np.abs(np.array(step_interval) - self.total_steps)) - if interval_save[interval_id] is False: - # Intermediate checkpoint. - path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt') - self.save(path) - interval_save[interval_id] = True # Evaluation. if self.eval_interval and self.total_steps % self.eval_interval == 0: eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size) results['eval'] = eval_results - self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format( - eval_results['ep_lengths'].mean(), - eval_results['ep_lengths'].std(), - eval_results['ep_returns'].mean(), - eval_results['ep_returns'].std())) + self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format(eval_results['ep_lengths'].mean(), + eval_results['ep_lengths'].std(), + eval_results['ep_returns'].mean(), + eval_results['ep_returns'].std())) # Save best model. eval_score = eval_results['ep_returns'].mean() eval_best_score = getattr(self, 'eval_best_score', -np.infty) @@ -200,8 +191,8 @@ def learn(self, if self.log_interval and self.total_steps % self.log_interval == 0: self.log_step(results) - def select_action(self, obs, info=None, extra_info=False): - """Determine the action to take at the current timestep. + def select_action(self, obs, info=None): + '''Determine the action to take at the current timestep. Args: obs (ndarray): The observation at this timestep. @@ -209,68 +200,21 @@ def select_action(self, obs, info=None, extra_info=False): Returns: action (ndarray): The action chosen by the controller. - """ + ''' with torch.no_grad(): obs = torch.FloatTensor(obs).to(self.device) - action, v, logp = self.agent.ac.act(obs, True) - if extra_info: - return action, v, logp - return action + action = self.agent.ac.act(obs) - def train_step(self): - """Performs a training/fine-tuning step.""" - self.agent.train() - self.obs_normalizer.unset_read_only() - rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size) - obs = self.obs - start = time.time() - for _ in range(self.rollout_steps): - with torch.no_grad(): - act, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device)) - next_obs, rew, done, info = self.env.step(act) - next_obs = self.obs_normalizer(next_obs) - rew = self.reward_normalizer(rew, done) - mask = 1 - done.astype(float) - # Time truncation is not the same as true termination. - terminal_v = np.zeros_like(v) - for idx, inf in enumerate(info['n']): - if 'terminal_info' not in inf: - continue - inff = inf['terminal_info'] - if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']: - terminal_obs = inf['terminal_observation'] - terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device) - terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy() - terminal_v[idx] = terminal_val - rollouts.push({'obs': obs, 'act': act, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v}) - obs = next_obs - self.obs = obs - self.total_steps += self.rollout_batch_size * self.rollout_steps - # Learn from rollout batch. - last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy() - ret, adv = compute_returns_and_advantages(rollouts.rew, - rollouts.v, - rollouts.mask, - rollouts.terminal_v, - last_val, - gamma=self.gamma, - use_gae=self.use_gae, - gae_lambda=self.gae_lambda) - rollouts.ret = ret - # Prevent divide-by-0 for repetitive tasks. - rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6) - results = self.agent.update(rollouts, self.device) - results.update({'step': self.total_steps, 'elapsed_time': time.time() - start}) - return results + return action def run(self, env=None, render=False, - n_episodes=1, + n_episodes=10, verbose=False, ): - """Runs evaluation with current policy.""" + '''Runs evaluation with current policy.''' self.agent.eval() self.obs_normalizer.set_read_only() if env is None: @@ -283,15 +227,30 @@ def run(self, env.add_tracker('constraint_values', 0, mode='queue') env.add_tracker('mse', 0, mode='queue') - obs, info = env.reset() + obs, info = self.env_reset(env, True) + true_obs = obs obs = self.obs_normalizer(obs) ep_returns, ep_lengths = [], [] frames = [] - mse, ep_rmse = [], [] + total_return = 0 + start = time.time() while len(ep_returns) < n_episodes: action = self.select_action(obs=obs, info=info) - obs, _, done, info = env.step(action) - mse.append(info['mse']) + + # Adding safety filter + success = False + physical_action = env.denormalize_action(action) + unextended_obs = np.squeeze(true_obs)[:env.symbolic.nx] + certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) + if success: + action = env.normalize_action(certified_action) + else: + self.safety_filter.ocp_solver.reset() + + action = np.atleast_2d(np.squeeze([action])) + obs, rew, done, info = env.step(action) + total_return += rew + if render: env.render() frames.append(env.render('rgb_array')) @@ -299,18 +258,20 @@ def run(self, print(f'obs {obs} | act {action}') if done: assert 'episode' in info - ep_rmse.append(np.array(mse).mean()**0.5) - mse = [] - ep_returns.append(info['episode']['r']) + ep_returns.append(total_return) ep_lengths.append(info['episode']['l']) - obs, _ = env.reset() + obs, info = self.env_reset(env, True) + total_return = 0 + true_obs = obs obs = self.obs_normalizer(obs) # Collect evaluation results. ep_lengths = np.asarray(ep_lengths) ep_returns = np.asarray(ep_returns) - eval_results = {'ep_returns': ep_returns, 'ep_lengths': ep_lengths, - 'rmse': np.array(ep_rmse).mean(), - 'rmse_std': np.array(ep_rmse).std()} + eval_results = { + 'ep_returns': ep_returns, + 'ep_lengths': ep_lengths, + 'elapsed_time': time.time() - start + } if len(frames) > 0: eval_results['frames'] = frames # Other episodic stats from evaluation env. @@ -319,20 +280,95 @@ def run(self, eval_results.update(queued_stats) return eval_results + def train_step(self): + '''Performs a training/fine-tuning step.''' + self.agent.train() + self.obs_normalizer.unset_read_only() + rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size) + obs = self.obs + true_obs = self.true_obs + info = self.info + start = time.time() + for _ in range(self.rollout_steps): + with torch.no_grad(): + action, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device)) + unsafe_action = action + + # Adding safety filter + success = False + if self.safety_filter is not None and (self.filter_train_actions is True or self.penalize_sf_diff is True): + physical_action = self.env.envs[0].denormalize_action(action) + unextended_obs = np.squeeze(true_obs)[:self.env.envs[0].symbolic.nx] + certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) + if success and self.filter_train_actions is True: + action = self.env.envs[0].normalize_action(certified_action) + else: + self.safety_filter.ocp_solver.reset() + + action = np.atleast_2d(np.squeeze([action])) + next_obs, rew, done, info = self.env.step(action) + if done[0] and self.use_safe_reset: + next_obs, info = self.env_reset(self.env, self.use_safe_reset) + if self.penalize_sf_diff and success: + rew = np.log(rew) + rew -= self.sf_penalty * np.linalg.norm(physical_action - certified_action) + rew = np.exp(rew) + next_true_obs = next_obs + next_obs = self.obs_normalizer(next_obs) + rew = self.reward_normalizer(rew, done) + mask = 1 - done.astype(float) + # Time truncation is not the same as true termination. + terminal_v = np.zeros_like(v) + for idx, inf in enumerate(info['n']): + if 'terminal_info' not in inf: + continue + inff = inf['terminal_info'] + if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']: + terminal_obs = inf['terminal_observation'] + terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device) + terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy() + terminal_v[idx] = terminal_val + + rollouts.push({'obs': obs, 'act': unsafe_action, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v}) + obs = next_obs + true_obs = next_true_obs + info = info['n'][0] + self.obs = obs + self.true_obs = true_obs + self.info = info + self.total_steps += self.rollout_batch_size * self.rollout_steps + # Learn from rollout batch. + last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy() + ret, adv = compute_returns_and_advantages(rollouts.rew, + rollouts.v, + rollouts.mask, + rollouts.terminal_v, + last_val, + gamma=self.gamma, + use_gae=self.use_gae, + gae_lambda=self.gae_lambda) + rollouts.ret = ret + # Prevent divide-by-0 for repetitive tasks. + rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6) + results = self.agent.update(rollouts, self.device) + results.update({'step': self.total_steps, 'elapsed_time': time.time() - start}) + return results + def log_step(self, results ): - """Does logging after a training step.""" + '''Does logging after a training step.''' step = results['step'] # runner stats self.logger.add_scalars( { 'step': step, - 'step_time': results['elapsed_time'], - 'progress': step / self.max_env_steps + 'progress': step / self.max_env_steps, }, step, - prefix='time') + prefix='time', + write=False, + write_tb=False) # Learning stats. self.logger.add_scalars( { @@ -341,44 +377,68 @@ def log_step(self, }, step, prefix='loss') - - try: - # Performance stats. - ep_lengths = np.asarray(self.env.length_queue) - ep_returns = np.asarray(self.env.return_queue) - ep_constraint_violation = np.asarray(self.env.queued_stats['constraint_violation']) + + # Performance stats. + ep_lengths = np.asarray(self.env.length_queue) + ep_returns = np.asarray(self.env.return_queue) + ep_constraint_violation = np.asarray(self.env.queued_stats['constraint_violation']) + self.logger.add_scalars( + { + 'ep_length': ep_lengths.mean(), + 'ep_return': ep_returns.mean(), + 'ep_reward': (ep_returns / ep_lengths).mean(), + 'ep_constraint_violation': ep_constraint_violation.mean(), + 'step_time': results['elapsed_time'], + }, + step, + prefix='stat') + # Total constraint violation during learning. + total_violations = self.env.accumulated_stats['constraint_violation'] + self.logger.add_scalars({'constraint_violation': total_violations}, step, prefix='stat') + if 'eval' in results: + eval_ep_lengths = results['eval']['ep_lengths'] + eval_ep_returns = results['eval']['ep_returns'] + eval_constraint_violation = results['eval']['constraint_violation'] + eval_mse = results['eval']['mse'] self.logger.add_scalars( { - 'ep_length': ep_lengths.mean(), - 'ep_return': ep_returns.mean(), - 'ep_return_std': ep_returns.std(), - 'ep_reward': (ep_returns / ep_lengths).mean(), - 'ep_constraint_violation': ep_constraint_violation.mean() + 'ep_length': eval_ep_lengths.mean(), + 'ep_return': eval_ep_returns.mean(), + 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), + 'constraint_violation': eval_constraint_violation.mean(), + 'mse': eval_mse.mean(), + 'step_time': results['eval']['elapsed_time'], }, step, - prefix='stat') - # Total constraint violation during learning. - total_violations = self.env.accumulated_stats['constraint_violation'] - self.logger.add_scalars({'constraint_violation': total_violations}, step, prefix='stat') - if 'eval' in results: - eval_ep_lengths = results['eval']['ep_lengths'] - eval_ep_returns = results['eval']['ep_returns'] - eval_constraint_violation = results['eval']['constraint_violation'] - eval_rmse = results['eval']['rmse'] - eval_rmse_std = results['eval']['rmse_std'] - self.logger.add_scalars( - { - 'ep_length': eval_ep_lengths.mean(), - 'ep_return': eval_ep_returns.mean(), - 'ep_return_std': eval_ep_returns.std(), - 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), - 'constraint_violation': eval_constraint_violation.mean(), - 'rmse': eval_rmse, - 'rmse_std': eval_rmse_std - }, - step, - prefix='stat_eval') - except: - pass + prefix='stat_eval') # Print summary table self.logger.dump_scalars() + + def env_reset(self, env, use_safe_reset): + '''Resets the environment until a feasible initial state is found. + + Args: + env (BenchmarkEnv): The environment that is being reset. + use_safe_reset (bool): Whether to safely reset the system using the SF. + + Returns: + obs (ndarray): The initial observation. + info (dict): The initial info. + ''' + success = False + action = self.model.U_EQ + obs, info = env.reset() + if self.safety_filter is not None: + self.safety_filter.reset_before_run() + + if use_safe_reset is True and self.safety_filter is not None: + while success is not True or np.any(self.safety_filter.slack_prev > 1e-4): + obs, info = env.reset() + info['current_step'] = 1 + unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx] + self.safety_filter.reset_before_run() + _, success = self.safety_filter.certify_action(unextended_obs, action, info) + if not success: + self.safety_filter.ocp_solver.reset() + + return obs, info diff --git a/safe_control_gym/envs/constraints.py b/safe_control_gym/envs/constraints.py index d7ff6fadd..3c98aaedd 100644 --- a/safe_control_gym/envs/constraints.py +++ b/safe_control_gym/envs/constraints.py @@ -106,7 +106,7 @@ def get_value(self, value (ndarray): The evaluation of the constraint. ''' env_value = self.get_env_constraint_var(env) - return np.round_(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals) + return np.round(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals) def is_violated(self, env, @@ -443,7 +443,7 @@ def __init__(self, self.num_constraints = self.bound.shape[0] def get_value(self, env): - c_value = np.round_(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals) + c_value = np.round(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals) return c_value # TODO: temp addition diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py index 5a0442a90..11e5ade6d 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py +++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py @@ -14,11 +14,10 @@ from datetime import datetime from enum import Enum +import casadi as cs import numpy as np import pybullet as p import pybullet_data -import casadi as cs -from termcolor import colored from safe_control_gym.envs.benchmark_env import BenchmarkEnv from safe_control_gym.math_and_models.transformations import csRotXYZ, get_angularvelocity_rpy @@ -95,26 +94,26 @@ def __init__(self, self.RECORD = record # Load the drone properties from the .urdf file. self.MASS, \ - self.L, \ - self.THRUST2WEIGHT_RATIO, \ - self.J, \ - self.J_INV, \ - self.KF, \ - self.KM, \ - self.COLLISION_H, \ - self.COLLISION_R, \ - self.COLLISION_Z_OFFSET, \ - self.MAX_SPEED_KMH, \ - self.GND_EFF_COEFF, \ - self.PROP_RADIUS, \ - self.DRAG_COEFF, \ - self.DW_COEFF_1, \ - self.DW_COEFF_2, \ - self.DW_COEFF_3, \ - self.PWM2RPM_SCALE, \ - self.PWM2RPM_CONST, \ - self.MIN_PWM, \ - self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH) + self.L, \ + self.THRUST2WEIGHT_RATIO, \ + self.J, \ + self.J_INV, \ + self.KF, \ + self.KM, \ + self.COLLISION_H, \ + self.COLLISION_R, \ + self.COLLISION_Z_OFFSET, \ + self.MAX_SPEED_KMH, \ + self.GND_EFF_COEFF, \ + self.PROP_RADIUS, \ + self.DRAG_COEFF, \ + self.DW_COEFF_1, \ + self.DW_COEFF_2, \ + self.DW_COEFF_3, \ + self.PWM2RPM_SCALE, \ + self.PWM2RPM_CONST, \ + self.MIN_PWM, \ + self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH) self.GROUND_PLANE_Z = -0.05 if verbose: print( @@ -273,8 +272,6 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): `_preprocess_action()` in each subclass. disturbance_force (ndarray, optional): Disturbance force, applied to all drones. ''' - time_before_stepping = time.time() - # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4)) clipped_action = np.expand_dims(clipped_action, axis=0) # Repeat for as many as the aggregate physics steps. @@ -283,7 +280,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): # Between aggregate steps for certain types of update. if self.PYB_STEPS_PER_CTRL > 1 and self.PHYSICS in [ Physics.DYN, Physics.PYB_GND, Physics.PYB_DRAG, - Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4 #, Physics.DYN_2D + Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4 # , Physics.DYN_2D ]: self._update_and_store_kinematic_information() # Step the simulation using the desired physics update. @@ -321,9 +318,9 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): if disturbance_force is not None: pos = self._get_drone_state_vector(i)[:3] ''' - NOTE: applyExternalForce only works when explicitly + NOTE: applyExternalForce only works when explicitly stepping the simulation with p.stepSimulation(). - Therefore, + Therefore, ''' p.applyExternalForce( self.DRONE_IDS[i], @@ -333,12 +330,12 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): flags=p.WORLD_FRAME, physicsClientId=self.PYB_CLIENT) # PyBullet computes the new state, unless Physics.DYN. - if self.PHYSICS not in [Physics.DYN, Physics.RK4, Physics.DYN_2D, Physics.DYN_SI, + if self.PHYSICS not in [Physics.DYN, Physics.RK4, Physics.DYN_2D, Physics.DYN_SI, Physics.DYN_SI_3D, Physics.DYN_SI_3D_10]: p.stepSimulation(physicsClientId=self.PYB_CLIENT) # Save the last applied action (e.g. to compute drag). self.last_clipped_action = clipped_action - if self.PHYSICS in [Physics.DYN_2D, Physics.DYN_SI, + if self.PHYSICS in [Physics.DYN_2D, Physics.DYN_SI, Physics.DYN_SI_3D, Physics.DYN_SI_3D_10]: # set the state of the drone after stepping with the analytical model self._set_pybullet_information() @@ -491,7 +488,7 @@ def _ground_effect(self, rpm, nth_drone): ]) prop_heights = np.clip(prop_heights, self.GND_EFF_H_CLIP, np.inf) gnd_effects = np.array(rpm ** 2) * self.KF * self.GND_EFF_COEFF \ - * (self.PROP_RADIUS / (4 * prop_heights)) ** 2 + * (self.PROP_RADIUS / (4 * prop_heights)) ** 2 if np.abs(self.rpy[nth_drone, 0]) < np.pi / 2 and np.abs( self.rpy[nth_drone, 1]) < np.pi / 2: for i in range(4): @@ -649,7 +646,6 @@ def _dynamics_rk4(self, rpm, nth_drone): self.rpy_rates[nth_drone, :] = rpy_rates def setup_rk4_dynamics_expression(self): - nx, nu = 12, 4 gamma = self.KM / self.KF z = cs.MX.sym('z') z_dot = cs.MX.sym('z_dot') @@ -666,7 +662,7 @@ def setup_rk4_dynamics_expression(self): # PyBullet Euler angles use the SDFormat for rotation matrices. Rob = csRotXYZ(phi, theta, psi) # rotation matrix transforming a vector in the body frame to the world frame. - # Define state variables. + # Define state variables. X = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p_body, q_body, r_body) # Define inputs. @@ -689,14 +685,14 @@ def setup_rk4_dynamics_expression(self): self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4), gamma * (-f1 + f2 - f3 + f4)) rate_dot = self.J_INV @ ( - Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) + Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) ang_dot = cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)], [0, cs.cos(phi), -cs.sin(phi)], [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @ cs.vertcat(p_body, q_body, r_body) X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot) - self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot]) + self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot]) self.fd_func = cs.integrator('fd', 'rk', {'x': X, 'p': U, 'ode': X_dot}, {'tf': self.PYB_TIMESTEP}) @@ -715,7 +711,6 @@ def _dynamics_2d(self, rpm, nth_drone): rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] ang_v = self.ang_v[nth_drone, :] - rpy_rates = self.rpy_rates[nth_drone, :] # rotation = np.array(p.getMatrixFromQuaternion(quat)).reshape(3, 3) # Compute forces and torques. @@ -728,7 +723,7 @@ def _dynamics_2d(self, rpm, nth_drone): # update state with RK4 # next_state = self.fd_func(x0=state, p=input)['xf'].full()[:, 0] X_dot = self.X_dot_fun(state, action).full()[:, 0] - next_state = state + X_dot*self.PYB_TIMESTEP + next_state = state + X_dot * self.PYB_TIMESTEP # Updated information pos = np.array([next_state[0], 0, next_state[4]]) @@ -794,13 +789,13 @@ def setup_dynamics_2d_expression(self): self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4), gamma * (-f1 + f2 - f3 + f4)) rate_dot = self.J_INV @ ( - Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) + Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) ang_dot = (cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)], [0, cs.cos(phi), -cs.sin(phi)], [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @ cs.vertcat(p_body, q_body, r_body)) X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot) - self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot]) + self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot]) def _dynamics_si(self, action, nth_drone, disturbance_force=None): '''Explicit dynamics implementation from the identified model. @@ -817,7 +812,6 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): # quat = self.quat[nth_drone, :] rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] - ang_v = self.ang_v[nth_drone, :] rpy_rates = self.rpy_rates[nth_drone, :] # Compute forces and torques. @@ -827,7 +821,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): # update state if disturbance_force is not None: d = np.array([disturbance_force[0], disturbance_force[2]]) - else: + else: d = np.array([0, 0]) # perform euler integration # next_state = state + self.PYB_TIMESTEP * self.X_dot_fun(state, action, d).full()[:, 0] @@ -836,7 +830,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): k2 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k1, action, d).full()[:, 0] k3 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k2, action, d).full()[:, 0] k4 = self.X_dot_fun(state + self.PYB_TIMESTEP * k3, action, d).full()[:, 0] - next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2*k2 + 2*k3 + k4) + next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2 * k2 + 2 * k3 + k4) # Updated information pos = np.array([next_state[0], 0, next_state[2]]) @@ -860,19 +854,19 @@ def setup_dynamics_si_expression(self): theta_dot = cs.MX.sym('theta_dot') # Pitch X = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) g = self.GRAVITY_ACC - d = cs.MX.sym('d', 2, 1) # disturbance force + d = cs.MX.sym('d', 2, 1) # disturbance force # Define inputs. - T = cs.MX.sym('T') # normlized thrust [N] + T = cs.MX.sym('T') # normlized thrust [N] P = cs.MX.sym('P') # desired pitch angle [rad] U = cs.vertcat(T, P) X_dot = cs.vertcat(x_dot, - (18.112984649321753 * T+ 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS, - z_dot, - (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS, - theta_dot, - -140.8 * theta - 13.4 * theta_dot + 124.8 * P) - self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot]) + (18.112984649321753 * T + 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS, + z_dot, + (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS, + theta_dot, + -140.8 * theta - 13.4 * theta_dot + 124.8 * P) + self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot]) def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None): '''Explicit dynamics implementation from the identified model. @@ -889,7 +883,6 @@ def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None): rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] ang_v = self.ang_v[nth_drone, :] - rpy_rates = self.rpy_rates[nth_drone, :] # Compute forces and torques. # Update state with discrete time dynamics. @@ -973,21 +966,21 @@ def setup_dynamics_si_3d_expression(self): params_pitch_rate = [-99.94, -13.3, 84.73] params_yaw_rate = [0, 0, 0] X_dot = cs.vertcat(x_dot, - (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, - y_dot, - (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), - z_dot, - (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, - phi_dot, - theta_dot, - psi_dot, - params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R, - params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P, - params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y) - - self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot]) - + (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, + y_dot, + (params_acc[0] * T + params_acc[1]) * ( + cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), + z_dot, + (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, + phi_dot, + theta_dot, + psi_dot, + params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R, + params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P, + params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y) + + self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot]) + def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None): '''Explicit dynamics implementation from the identified model. NOTE: The dynamics update is independent of the pybullet simulation. @@ -1003,7 +996,6 @@ def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None): rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] ang_v = self.ang_v[nth_drone, :] - rpy_rates = self.rpy_rates[nth_drone, :] # Compute forces and torques. # Update state with discrete time dynamics. @@ -1036,7 +1028,7 @@ def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None): self.ang_v[nth_drone, :] = ang_v.copy() def setup_dynamics_si_3d_10_expression(self): - # Casadi states + # Casadi states x = cs.MX.sym('x') y = cs.MX.sym('y') z = cs.MX.sym('z') @@ -1083,19 +1075,19 @@ def setup_dynamics_si_3d_10_expression(self): psi = 0 X_dot = cs.vertcat(x_dot, - (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, - y_dot, - (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), - z_dot, - (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, - phi_dot, - theta_dot, - params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R, - params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P, + (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, + y_dot, + (params_acc[0] * T + params_acc[1]) * ( + cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), + z_dot, + (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, + phi_dot, + theta_dot, + params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R, + params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P, ) - self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot]) + self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot]) def _show_drone_local_axes(self, nth_drone): '''Draws the local frame of the n-th drone in PyBullet's GUI. @@ -1167,5 +1159,5 @@ def _parse_urdf_parameters(self, file_name): MIN_PWM = float(URDF_TREE[0].attrib['pwm_min']) MAX_PWM = float(URDF_TREE[0].attrib['pwm_max']) return M, L, THRUST2WEIGHT_RATIO, J, J_INV, KF, KM, COLLISION_H, COLLISION_R, COLLISION_Z_OFFSET, MAX_SPEED_KMH, \ - GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \ - PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM \ No newline at end of file + GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \ + PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 7e4f337cc..6baa35db7 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -235,7 +235,7 @@ def __init__(self, (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and len(info_mse_metric_state_weight) == 5) or \ (self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE and len(info_mse_metric_state_weight) == 12) or \ (self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE_10 and len(info_mse_metric_state_weight) == 10): - + self.info_mse_metric_state_weight = np.array(info_mse_metric_state_weight, ndmin=1, dtype=float) else: raise ValueError('[ERROR] in Quadrotor.__init__(), wrong info_mse_metric_state_weight argument size.') @@ -256,7 +256,7 @@ def __init__(self, QuadType.THREE_D_ATTITUDE: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot', 'init_phi', 'init_theta', 'init_psi', 'init_p', 'init_q', 'init_r'], QuadType.THREE_D_ATTITUDE_10: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot', - 'init_phi', 'init_theta', 'init_p', 'init_q'], + 'init_phi', 'init_theta', 'init_p', 'init_q'], } if init_state is None: for init_name in self.INIT_STATE_RAND_INFO: # Default zero state. @@ -283,8 +283,8 @@ def __init__(self, self.INERTIAL_PROP_RAND_INFO.pop('Izz', None) elif self.QUAD_TYPE == QuadType.TWO_D or \ self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or \ - self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S \ - or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY: + self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S or \ + self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY: # Only randomize Iyy for the 2D quadrotor. self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None) self.INERTIAL_PROP_RAND_INFO.pop('Izz', None) @@ -691,7 +691,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): # Define observation. Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: - # identified parameters for the 2D attitude interface + # identified parameters for the 2D attitude interface # NOTE: these parameters are not set in the prior_prop dict # since they are specific to the 2D attitude model self.beta_1 = prior_prop.get('beta_1', 18.112984649321753) @@ -739,7 +739,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): P_mapping = self.alpha_1 * (theta + self.pitch_bias) + self.alpha_2 * theta_dot + self.alpha_3 * P self.T_mapping_func = cs.Function('T_mapping', [T], [T_mapping]) self.P_mapping_func = cs.Function('P_mapping', [theta, theta_dot, P], [P_mapping]) - + elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY: nx, nu = 6, 2 # Define states. @@ -758,18 +758,18 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): # With the formulat F_desired = b_F * T + a_F # Define dynamics equations. - X_dot = cs.vertcat(vx *cs.cos(theta) - vz * cs.sin(theta), + X_dot = cs.vertcat(vx * cs.cos(theta) - vz * cs.sin(theta), vz * theta_dot - g * cs.sin(theta), vx * cs.sin(theta) + vz * cs.cos(theta), - -vx * theta_dot - g * cs.cos(theta) + (beta_1 * T + beta_2), + -vx * theta_dot - g * cs.cos(theta) + (self.beta_1 * T + self.beta_2), -theta_dot, - alpha_1 * (-theta + pitch_bias) + alpha_2 * -theta_dot + alpha_3 * P) + self.alpha_1 * (-theta + self.pitch_bias) + self.alpha_2 * -theta_dot + self.alpha_3 * P) # Define observation. x_dot = vx * cs.cos(theta) + vz * cs.sin(theta) z_dot = -vx * cs.sin(theta) + vz * cs.cos(theta) # Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) Y = cs.vertcat(x, vx, z, vz, theta, theta_dot) - T_mapping = beta_1 * T + beta_2 + T_mapping = self.beta_1 * T + self.beta_2 self.T_mapping_func = cs.Function('T_mapping', [T], [T_mapping]) elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: @@ -878,10 +878,10 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): # TODO: create a parameter for the new quad model X_dot = cs.vertcat(x_dot, (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), + cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), y_dot, (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), + cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), z_dot, (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g, phi_dot, @@ -892,7 +892,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y) # Define observation. Y = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, phi_dot, theta_dot, psi_dot) - + elif self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE_10: nx, nu = 10, 3 # Define states. @@ -921,17 +921,17 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): # TODO: create a parameter for the new quad model X_dot = cs.vertcat(x_dot, (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), + cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), y_dot, (params_acc[0] * T + params_acc[1]) * ( - cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), + cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), z_dot, (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g, phi_dot, theta_dot, params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R, params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P, - ) + ) # Define observation. Y = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, phi_dot, theta_dot) @@ -1068,8 +1068,8 @@ def _set_action_space(self): else: self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim - self.action_scale = (self.physical_action_bounds[1]-self.physical_action_bounds[0])/2 - self.action_bias = (self.physical_action_bounds[1]+self.physical_action_bounds[0])/2 + self.action_scale = (self.physical_action_bounds[1] - self.physical_action_bounds[0]) / 2 + self.action_bias = (self.physical_action_bounds[1] + self.physical_action_bounds[0]) / 2 self.action_space = spaces.Box(low=-np.ones(action_dim), high=np.ones(action_dim), dtype=np.float32) @@ -1166,10 +1166,10 @@ def _set_observation_space(self): self.phi_dot_threshold_radians, self.theta_dot_threshold_radians ]) self.STATE_LABELS = ['x', 'x_dot', 'y', 'y_dot', 'z', 'z_dot', - 'phi', 'theta', 'phi_dot', 'theta_dot'] + 'phi', 'theta', 'phi_dot', 'theta_dot'] self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'm', 'm/s', 'rad', 'rad', 'rad/s', 'rad/s'] - + # Define the state space for the dynamics. self.state_space = spaces.Box(low=low, high=high, dtype=np.float32) @@ -1267,7 +1267,7 @@ def normalize_action(self, action): # else: # action = (action / self.hover_thrust - 1) / self.norm_act_scale - action = (action - self.action_bias)/self.action_scale + action = (action - self.action_bias) / self.action_scale return action @@ -1297,7 +1297,7 @@ def denormalize_action(self, action): # else: # action = (1 + self.norm_act_scale * action) * self.hover_thrust - action = action*self.action_scale + self.action_bias + action = action * self.action_scale + self.action_bias # action = np.clip(action, self.action_space.low, self.action_space.high) return action @@ -1498,9 +1498,9 @@ def _get_info(self): # Filter only relevant dimensions. state_error = state_error * self.info_mse_metric_state_weight info['mse'] = np.sum(state_error ** 2) - # if self.constraints is not None: - # info['constraint_values'] = self.constraints.get_values(self) - # info['constraint_violations'] = self.constraints.get_violations(self) + if self.constraints is not None: + info['constraint_values'] = self.constraints.get_values(self) + info['constraint_violations'] = self.constraints.get_violations(self) return info def _get_reset_info(self): @@ -1515,4 +1515,4 @@ def _get_reset_info(self): }, 'x_reference': self.X_GOAL, 'u_reference': self.U_GOAL} if self.constraints is not None: info['symbolic_constraints'] = self.constraints.get_all_symbolic_models() - return info \ No newline at end of file + return info diff --git a/safe_control_gym/safety_filters/__init__.py b/safe_control_gym/safety_filters/__init__.py index 2d53e550f..41527f16c 100644 --- a/safe_control_gym/safety_filters/__init__.py +++ b/safe_control_gym/safety_filters/__init__.py @@ -6,6 +6,10 @@ entry_point='safe_control_gym.safety_filters.mpsc.linear_mpsc:LINEAR_MPSC', config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') +register(idx='mpsc_acados', + entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS', + config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') + register(idx='cbf', entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF', config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml') diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.py b/safe_control_gym/safety_filters/mpsc/mpsc.py index 34307fe8e..251c58985 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc.py @@ -17,6 +17,7 @@ from safe_control_gym.controllers.mpc.mpc_utils import get_cost_weight_matrix, reset_constraints from safe_control_gym.safety_filters.base_safety_filter import BaseSafetyFilter from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.one_step_cost import ONE_STEP_COST +from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.precomputed_cost import PRECOMPUTED_COST from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, get_trajectory_on_horizon @@ -26,13 +27,16 @@ class MPSC(BaseSafetyFilter, ABC): def __init__(self, env_func, horizon: int = 10, - q_lin: list = None, - r_lin: list = None, + q_mpc: list = None, + r_mpc: list = None, integration_algo: str = 'rk4', warmstart: bool = True, additional_constraints: list = None, use_terminal_set: bool = True, cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + use_acados: bool = False, **kwargs ): '''Initialize the MPSC. @@ -40,13 +44,15 @@ def __init__(self, Args: env_func (partial BenchmarkEnv): Environment for the task. horizon (int): The MPC horizon. - q_lin, r_lin (list): Q and R gain matrices for linear controller. + q_mpc, r_mpc (list): Q and R gain matrices for linear controller. integration_algo (str): The algorithm used for integrating the dynamics, either 'LTI', 'rk4', 'rk', or 'cvodes'. warmstart (bool): If the previous MPC soln should be used to warmstart the next mpc step. additional_constraints (list): List of additional constraints to consider. use_terminal_set (bool): Whether to use a terminal set constraint or not. cost_function (Cost_Function): A string (from Cost_Function) representing the cost function to be used. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. ''' # Store all params/args. @@ -62,15 +68,14 @@ def __init__(self, self.env = env_func(normalized_rl_action_space=False) self.training_env = env_func(randomized_init=True, init_state=None, - cost='quadratic', normalized_rl_action_space=False, ) # Setup attributes. self.reset() self.dt = self.model.dt - self.Q = get_cost_weight_matrix(q_lin, self.model.nx) - self.R = get_cost_weight_matrix(r_lin, self.model.nu) + self.Q = get_cost_weight_matrix(q_mpc, self.model.nx) + self.R = get_cost_weight_matrix(r_mpc, self.model.nu) self.X_EQ = np.zeros(self.model.nx) self.U_EQ = self.model.U_EQ @@ -79,6 +84,7 @@ def __init__(self, self.lqr_gain = -compute_lqr_gain(self.model, self.X_EQ, self.U_EQ, self.Q, self.R, discrete_dynamics=True) self.terminal_set = None + self.prev_action = self.U_EQ if self.additional_constraints is None: additional_constraints = [] @@ -87,6 +93,10 @@ def __init__(self, if cost_function == Cost_Function.ONE_STEP_COST: self.cost_function = ONE_STEP_COST() + self.mpsc_cost_horizon = 1 + self.cost_function.mpsc_cost_horizon = 1 + elif cost_function == Cost_Function.PRECOMPUTED_COST: + self.cost_function = PRECOMPUTED_COST(self.env, mpsc_cost_horizon, decay_factor, self.output_dir) else: raise NotImplementedError(f'The MPSC cost function {cost_function} has not been implemented') @@ -95,9 +105,21 @@ def set_dynamics(self): '''Compute the dynamics.''' raise NotImplementedError - @abstractmethod def setup_optimizer(self): '''Setup the certifying MPC problem.''' + if self.use_acados: + self.setup_acados_optimizer() + else: + self.setup_casadi_optimizer() + + @abstractmethod + def setup_casadi_optimizer(self): + '''Setup the certifying MPC problem using CasADi.''' + raise NotImplementedError + + @abstractmethod + def setup_acados_optimizer(self): + '''Setup the certifying MPC problem using ACADOS.''' raise NotImplementedError def before_optimization(self, obs): @@ -125,6 +147,28 @@ def solve_optimization(self, feasible (bool): Whether the safety filtering was feasible or not. ''' + if self.use_acados: + action, feasible = self.solve_acados_optimization(obs, uncertified_action, iteration) + else: + action, feasible = self.solve_casadi_optimization(obs, uncertified_action, iteration) + return action, feasible + + def solve_casadi_optimization(self, + obs, + uncertified_action, + iteration=None, + ): + '''Solve the MPC optimization problem for a given observation and uncertified input. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + action (ndarray): The certified action. + feasible (bool): Whether the safety filtering was feasible or not. + ''' opti_dict = self.opti_dict opti = opti_dict['opti'] z_var = opti_dict['z_var'] @@ -153,6 +197,8 @@ def solve_optimization(self, # Solve the optimization problem. try: sol = opti.solve() + self.cost_prev = sol.value(opti_dict['cost']) + self.slack_prev = sol.value(opti_dict['slack']) x_val, u_val, next_u_val = sol.value(z_var), sol.value(v_var), sol.value(next_u) self.z_prev = x_val self.v_prev = u_val.reshape((self.model.nu), self.horizon) @@ -168,6 +214,56 @@ def solve_optimization(self, action = None return action, feasible + def solve_acados_optimization(self, + obs, + uncertified_action, + iteration=None, + ): + '''Solve the MPC optimization problem for a given observation and uncertified input. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + action (ndarray): The certified action. + feasible (bool): Whether the safety filtering was feasible or not. + ''' + + ocp_solver = self.ocp_solver + ocp_solver.cost_set(0, 'yref', np.concatenate((np.zeros((self.model.nx)), np.atleast_1d(np.squeeze(uncertified_action))))) + + if isinstance(self.cost_function, PRECOMPUTED_COST): + uncert_input_traj = self.cost_function.calculate_unsafe_path(obs, uncertified_action, iteration) + + for stage in range(1, self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'yref', np.concatenate((np.zeros((self.model.nx)), uncert_input_traj[:, stage]))) + + # Solve the optimization problem. + try: + action = ocp_solver.solve_for_x0(x0_bar=obs) + self.cost_prev = ocp_solver.get_cost() + self.slack_prev = np.zeros((self.horizon, self.model.nx + self.model.nu)) + x_val = np.zeros((self.horizon + 1, self.model.nx)) + u_val = np.zeros((self.horizon, self.model.nu)) + for i in range(self.horizon): + # self.slack_prev[i, :] = ocp_solver.get(i, 'su') + x_val[i, :] = ocp_solver.get(i, 'x') + u_val[i, :] = ocp_solver.get(i, 'u') + x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x') + self.z_prev = x_val.T + self.v_prev = u_val.T + # Take the first one from solved action sequence. + self.prev_action = action + feasible = True + except Exception as e: + print('Error Return Status:', ocp_solver.status) + print(e) + feasible = False + action = None + return action, feasible + def certify_action(self, current_state, uncertified_action, @@ -254,5 +350,6 @@ def reset_before_run(self, env=None): ''' self.z_prev = None self.v_prev = None + self.slack_prev = 0 self.kinf = self.horizon - 1 self.setup_results_dict() diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.yaml b/safe_control_gym/safety_filters/mpsc/mpsc.yaml index 9d2798d0c..9640712bd 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc.yaml +++ b/safe_control_gym/safety_filters/mpsc/mpsc.yaml @@ -1,7 +1,7 @@ # LQR controller parameters -r_lin: +r_mpc: - 1. -q_lin: +q_mpc: - 1. # MPC Parameters @@ -10,19 +10,7 @@ warmstart: False integration_algo: rk4 use_terminal_set: False -# Prior info -prior_info: - prior_prop: null - randomize_prior_prop: False - prior_prop_rand_info: null - -# Safe set calculation -n_samples: 600 -n_samples_terminal_set: 10 -learn_terminal_set: False - -# Tau parameter for the calcuation of the RPI -tau: 0.95 - # Cost function cost_function: one_step_cost +mpsc_cost_horizon: 5 +decay_factor: 0.85 diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_acados.py b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py new file mode 100644 index 000000000..1846f85ca --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py @@ -0,0 +1,259 @@ +'''Model Predictive Safety Certification using Acados.''' +import os +import shutil +from datetime import datetime + +import casadi as cs +import numpy as np +import scipy +from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver + +from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound +from safe_control_gym.safety_filters.mpsc.mpsc import MPSC +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.utils import timing + + +class MPSC_ACADOS(MPSC): + '''MPSC with full nonlinear model.''' + + def __init__( + self, + env_func, + horizon: int = 5, + q_mpc: list = [1], + r_mpc: list = [1], + integration_algo: str = 'rk4', + warmstart: bool = True, + additional_constraints: list = None, + use_terminal_set: bool = True, + soften_constraints: bool = False, + slack_cost: float = 1, + constraint_tol: float = 1e-6, + seed: int = 0, + use_RTI: bool = False, + cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + **kwargs + ): + '''Creates task and controller. + + Args: + env_func (Callable): function to instantiate task/environment. + horizon (int): mpc planning horizon. + q_mpc (list): diagonals of state cost weight. + r_mpc (list): diagonals of input/action cost weight. + warmstart (bool): if to initialize from previous iteration. + soften_constraints (bool): Formulate the constraints as soft constraints. + constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact. + seed (int): random seed. + use_RTI (bool): Real-time iteration for acados. + ''' + for k, v in locals().items(): + if k != 'self' and k != 'kwargs' and '__' not in k: + self.__dict__.update({k: v}) + + super().__init__( + env_func, + horizon, + q_mpc, + r_mpc, + integration_algo, + warmstart, + additional_constraints, + use_terminal_set, + cost_function, + mpsc_cost_horizon, + decay_factor, + **kwargs) + + # acados settings + self.use_RTI = use_RTI + + # Dynamics model. + self.setup_acados_model() + # Acados optimizer. + self.setup_acados_optimizer() + + @timing + def reset(self): + '''Prepares for training or evaluation.''' + print('Resetting MPC') + super().reset() + if hasattr(self, 'acados_model'): + del self.acados_model + if hasattr(self, 'ocp'): + del self.ocp + if hasattr(self, 'acados_ocp_solver'): + del self.acados_ocp_solver + + # delete the generated c code directory + if os.path.exists('./c_generated_code'): + print('deleting the generated MPC c code directory') + shutil.rmtree('./c_generated_code') + assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory' + + def setup_acados_model(self) -> AcadosModel: + '''Sets up symbolic model for acados.''' + acados_model = AcadosModel() + acados_model.x = self.model.x_sym + acados_model.u = self.model.u_sym + acados_model.name = self.env.NAME + + # set up rk4 (acados need symbolic expression of dynamics, not function) + k1 = self.model.fc_func(acados_model.x, acados_model.u) + k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) + k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) + k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) + f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) + + acados_model.disc_dyn_expr = f_disc + + # store meta information # NOTE: unit is missing + acados_model.x_labels = self.env.STATE_LABELS + acados_model.u_labels = self.env.ACTION_LABELS + acados_model.t_label = 'time' + # get current time stamp in $ymd_HMS format + current_time = datetime.now().strftime('%Y%m%d_%H%M%S') + acados_model.name = self.env.NAME + '_' + current_time + + self.acados_model = acados_model + + def set_dynamics(self): + pass + + def setup_casadi_optimizer(self): + pass + + def setup_acados_optimizer(self): + '''Sets up nonlinear optimization problem.''' + nx, nu = self.model.nx, self.model.nu + ny = nx + nu + ny_e = nx + + # create ocp object to formulate the OCP + ocp = AcadosOcp() + ocp.model = self.acados_model + + # set dimensions + ocp.dims.N = self.horizon # prediction horizon + + # set cost (NOTE: safe-control-gym uses quadratic cost) + ocp.cost.cost_type = 'LINEAR_LS' + ocp.cost.cost_type_e = 'LINEAR_LS' + + Q_mat = np.zeros((nx, nx)) + R_mat = np.eye(nu) + ocp.cost.W_e = np.zeros((nx, nx)) + ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat) + + ocp.cost.Vx = np.zeros((ny, nx)) + ocp.cost.Vu = np.zeros((ny, nu)) + ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu) + ocp.cost.Vx_e = np.eye(nx) + + # placeholder y_ref and y_ref_e (will be set in select_action) + ocp.cost.yref = np.zeros((ny, )) + ocp.cost.yref_e = np.zeros((ny_e, )) + + # set up solver options + ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM' + ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' + ocp.solver_options.integrator_type = 'DISCRETE' + ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI' + ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1 + ocp.solver_options.tf = self.horizon * self.dt # prediction horizon + + ocp.constraints.x0 = self.model.X_EQ + + # Constraints + # general constraint expressions + state_constraint_expr_list = [] + input_constraint_expr_list = [] + for state_constraint in self.state_constraints_sym: + state_constraint_expr_list.append(state_constraint(ocp.model.x)) + for input_constraint in self.input_constraints_sym: + input_constraint_expr_list.append(input_constraint(ocp.model.u)) + + h_expr_list = state_constraint_expr_list + input_constraint_expr_list + h_expr = cs.vertcat(*h_expr_list) + h0_expr = cs.vertcat(*h_expr_list) + he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints + # pass the constraints to the ocp object + ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr) + + # slack costs for nonlinear constraints + if self.soften_constraints: + # slack variables for all constraints + ocp.constraints.Jsh = np.eye(2 * ny) + # slack penalty + ocp.cost.Zu = self.slack_cost * np.ones(2 * ny) + ocp.cost.Zl = self.slack_cost * np.ones(2 * ny) + ocp.cost.zl = self.slack_cost * np.ones(2 * ny) + ocp.cost.zu = self.slack_cost * np.ones(2 * ny) + + solver_json = 'acados_ocp_mpsf.json' + ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True) + + for stage in range(self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W) + + for stage in range(self.mpsc_cost_horizon, self.horizon): + ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W) + + self.ocp_solver = ocp_solver + self.ocp = ocp + + def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp: + '''Preprocess the constraints to be compatible with acados. + Args: + ocp (AcadosOcp): acados ocp object + h0_expr (casadi expression): initial state constraints + h_expr (casadi expression): state and input constraints + he_expr (casadi expression): terminal state constraints + Returns: + ocp (AcadosOcp): acados ocp object with constraints set. + + An alternative way to set the constraints is to use bounded constraints of acados: + # bounded input constraints + idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] + ocp.constraints.Jbu = np.eye(nu) + ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds + ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + ocp.constraints.idxbu = idxbu # active constraints dimension + ''' + + ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol), + 'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol), + 'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), } + + lb = {'h': set_acados_constraint_bound(h_expr, 'lb'), + 'h0': set_acados_constraint_bound(h0_expr, 'lb'), + 'he': set_acados_constraint_bound(he_expr, 'lb'), } + + # make sure all the ub and lb are 1D numpy arrays + # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) + for key in ub.keys(): + ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] + lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] + # check ub and lb dimensions + for key in ub.keys(): + assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' + assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' + assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' + + # pass the constraints to the ocp object + ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ + h0_expr, h_expr, he_expr + ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ + h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] + # assign constraints upper and lower bounds + ocp.constraints.uh_0 = ub['h0'] + ocp.constraints.lh_0 = lb['h0'] + ocp.constraints.uh = ub['h'] + ocp.constraints.lh = lb['h'] + ocp.constraints.uh_e = ub['he'] + ocp.constraints.lh_e = lb['he'] + + return ocp diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py index 3980790c2..cc9db996a 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py @@ -10,11 +10,15 @@ class MPSC_COST(ABC): def __init__(self, env: BenchmarkEnv = None, + mpsc_cost_horizon: int = 1, + decay_factor: float = 0.85, ): '''Initialize the MPSC Cost. Args: env (BenchmarkEnv): Environment for the task. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. ''' self.env = env @@ -22,6 +26,9 @@ def __init__(self, # Setup attributes. self.model = self.env.symbolic if env is not None else None + self.mpsc_cost_horizon = mpsc_cost_horizon + self.decay_factor = decay_factor + @abstractmethod def get_cost(self, opti_dict): '''Returns the cost function for the MPSC optimization in symbolic form. diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py new file mode 100644 index 000000000..b6a1acc4e --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py @@ -0,0 +1,127 @@ +'''Precomputed Cost Function for Smooth MPSC.''' + +import numpy as np + +from safe_control_gym.controllers.pid.pid import PID +from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnv +from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.abstract_cost import MPSC_COST + + +class PRECOMPUTED_COST(MPSC_COST): + '''Precomputed future states MPSC Cost Function.''' + + def __init__(self, + env, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + output_dir: str = '.', + ): + '''Initialize the MPSC Cost. + + Args: + env (BenchmarkEnv): Environment for the task. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. + output_dir (str): Folder to write outputs. + ''' + + super().__init__(env, mpsc_cost_horizon, decay_factor) + + self.output_dir = output_dir + self.uncertified_controller = None + + def get_cost(self, opti_dict): + '''Returns the cost function for the MPSC optimization in symbolic form. + + Args: + opti_dict (dict): The dictionary of optimization variables. + + Returns: + cost (casadi symbolic expression): The symbolic cost function using casadi. + ''' + + opti = opti_dict['opti'] + next_u = opti_dict['next_u'] + u_L = opti_dict['u_L'] + v_var = opti_dict['v_var'] + + v_L = opti.parameter(self.model.nu, self.mpsc_cost_horizon) + + opti_dict['v_L'] = v_L + + cost = (u_L - next_u).T @ (u_L - next_u) + for h in range(1, self.mpsc_cost_horizon): + cost += (self.decay_factor**h) * (v_L[:, h] - v_var[:, h]).T @ (v_L[:, h] - v_var[:, h]) + + return cost + + def prepare_cost_variables(self, opti_dict, obs, iteration): + '''Prepares all the symbolic variable initial values for the next optimization. + + Args: + opti_dict (dict): The dictionary of optimization variables. + obs (ndarray): Current state/observation. + iteration (int): The current iteration, used for trajectory tracking. + ''' + + opti = opti_dict['opti'] + v_L = opti_dict['v_L'] + u_L = opti_dict['u_L'] + + uncertified_action = opti.value(u_L, opti.initial()) + + expected_inputs = self.calculate_unsafe_path(obs, uncertified_action, iteration) + opti.set_value(v_L, expected_inputs) + + def calculate_unsafe_path(self, obs, uncertified_action, iteration): + '''Precomputes the likely actions the uncertified controller will take. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + v_L (ndarray): The estimated future actions taken by the uncertified_controller. + ''' + + if self.uncertified_controller is None: + raise Exception('[ERROR] No underlying controller passed to P_MPSC') + + if isinstance(self.uncertified_controller.env, VecEnv): + uncert_env = self.uncertified_controller.env.envs[0] + else: + uncert_env = self.uncertified_controller.env + + v_L = np.zeros((self.model.nu, self.mpsc_cost_horizon)) + + if isinstance(self.uncertified_controller, PID): + self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_curr.npy') + self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_prev.npy') + + for h in range(self.mpsc_cost_horizon): + next_step = min(iteration + h, self.env.X_GOAL.shape[0] - 1) + # Concatenate goal info (goal state(s)) for RL + extended_obs = self.env.extend_obs(obs, next_step + 1) + + info = {'current_step': next_step} + + action = self.uncertified_controller.select_action(obs=extended_obs, info=info) + + if uncert_env.NORMALIZED_RL_ACTION_SPACE: + action = uncert_env.denormalize_action(action) + + action = np.clip(action, self.env.physical_action_bounds[0], self.env.physical_action_bounds[1]) + + if h == 0 and np.linalg.norm(uncertified_action - action) >= 0.001: + raise ValueError(f'[ERROR] Mismatch between unsafe controller and MPSC guess. Uncert: {uncertified_action}, Guess: {action}, Diff: {np.linalg.norm(uncertified_action - action)}.') + + v_L[:, h:h + 1] = action.reshape((self.model.nu, 1)) + + obs = np.squeeze(self.model.fd_func(x0=obs, p=action)['xf'].toarray()) + + if isinstance(self.uncertified_controller, PID): + self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_curr.npy') + self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_prev.npy') + + return v_L diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py index 98bfd40be..0771e0f75 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py @@ -16,6 +16,7 @@ class Cost_Function(str, Enum): '''MPSC Cost functions enumeration class.''' ONE_STEP_COST = 'one_step_cost' # Default MPSC cost function. + PRECOMPUTED_COST = 'precomputed_cost' # Smooth cost based on precomputed future states def compute_RPI_set(Acl, @@ -58,8 +59,7 @@ def compute_RPI_set(Acl, except cp.SolverError: msg = '[ERROR] RPI Computation failed. Ensure you have the MOSEK solver. Otherwise, error unknown.' print(msg) - raise Exception(msg) from None - # exit() + raise Exception(msg) from None return P.value @@ -144,3 +144,16 @@ def get_trajectory_on_horizon(env, iteration, horizon): clipped_X_GOAL = env.X_GOAL return clipped_X_GOAL + + +def get_discrete_derivative(signal, ctrl_freq): + '''Calculates the discrete derivative of a signal. + + Args: + signal (np.ndarray): A array of values. + + Returns: + discrete_derivative (float): The discrete_derivative of the signal. + ''' + discrete_derivative = (signal[1:, :] - signal[:-1, :]) * ctrl_freq + return discrete_derivative