diff --git a/.gitignore b/.gitignore
index 49e2bcb3f..31805cfd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,11 +6,13 @@ examples/mpsc/unsafe_rl_temp_data/
 #
 examples/pid/*data/
 #
-Results/
+experiments/mpsc/results*
+experiments/mpsc/models/rl_models*
+#
 results/
 z_docstring.py
 TODOs.md
-# 
+#
 hpo_study*/
 hp_study*/
 comparisons/
@@ -155,10 +157,4 @@ dmypy.json
 .idea/
 
 *c_generated_code/
-acados_ocp_nlp.json
-gpmpc_acados_ocp_solver.json
-gpmpc_update/
-temp
-models/
-benchmarking_sim/quadrotor/acados_ocp.json
-acados_ocp.json
\ No newline at end of file
+*acados_ocp*.json
diff --git a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml
index 4c51188d5..eca0f59be 100644
--- a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml
+++ b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml
@@ -1,34 +1,10 @@
 algo: mpc_acados
 algo_config:
-  # horizon: 40
-  # r_mpc:
-  #   - 0.8
-  #   - 0.8
-  # q_mpc:
-  #   - 5.0
-  #   - 0.1
-  #   - 5.0
-  #   - 0.1
-  #   - 0.5
-  #   - 0.001
-  # horizon: 25
-  q_mpc: [18, 0.1, 18, 0.1, 0.5, 0.01]
-  r_mpc: [15., 5.]
-  # r_mpc: [3., 3.]
-  # horizon: 50
-  # q_mpc:
-  # - 7.920690650196039
-  # - 0.0001
-  # - 2.284725973739269
-  # - 0.00010069955204926803
-  # - 0.017010527262423716
-  # - 0.0001
-  # r_mpc:
-  # - 0.0001
-  # - 0.010206543973281433
+  horizon: 25
+  q_mpc: [10, 0.1, 10, 0.1, 0.1, 0.001]
+  r_mpc: [0.1, 0.1]
   prior_info:
-    # prior_prop: null
-    prior_prop: 
+    prior_prop:
       M: 0.033
       beta_1: 18.11
       beta_2: 3.68
@@ -40,9 +16,7 @@ algo_config:
     randomize_prior_prop: False
     prior_prop_rand_info: null
   warmstart: True
-  # use_lqr_gain_and_terminal_cost: True
-  # soft_constraints: True
-  # use_RTI: True
   output_dir: ./mpc_acados/results
 
-
+  soft_constraints: True
+  soft_penalty: 1000.0
diff --git a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml
index 465e43ea3..f642f1586 100644
--- a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml
+++ b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml
@@ -3,7 +3,6 @@ task_config:
   ctrl_freq: 60
   pyb_freq: 60
   physics: dyn_si
-  # physics: pyb
   quad_type: 4
 
   init_state:
@@ -13,35 +12,9 @@ task_config:
     init_z_dot: 0
     init_theta: 0
     init_theta_dot: 0
-  randomized_init: True
+  randomized_init: False
   randomized_inertial_prop: False
 
-  init_state_randomization_info:
-    init_x:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_x_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_z:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_z_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_theta:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_theta_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-
   task: traj_tracking
   task_info:
     trajectory_type: figure8
@@ -49,8 +22,6 @@ task_config:
     trajectory_plane: 'xz'
     trajectory_position_offset: [0, 1.]
     trajectory_scale: 1.0
-    # ilqr_ref: True
-    # ilqr_traj_data: /home/mingxuan/Repositories/scg_tsung/examples/lqr/ilqr_ref_traj.npy
 
   inertial_prop:
     M: 0.033
@@ -64,24 +35,21 @@ task_config:
     pitch_bias: 0.0 # in radian
 
   episode_len_sec: 11
-  cost: quadratic
+  cost: rl_reward
+  obs_goal_horizon: 0
+
+  # RL Reward
+  rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001]
+  rew_act_weight: [0.1, 0.1]
+  rew_exponential: True
 
   constraints:
     - constraint_form: default_constraint
       constrained_variable: state
+      upper_bounds: [ 0.9,  2, 1.45,  2,  0.75,  3]
+      lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
     - constraint_form: default_constraint
       constrained_variable: input
-      # upper_bounds: [0.58212, 0.7]
-      # lower_bounds: [0.09702, -0.7]
 
   done_on_out_of_bound: True
   done_on_violation: False
-  disturbances:
-    # dynamics: # disturbance force in newton
-    #   - disturbance_func: uniform
-    #     low:  1.
-    #     high: 1.
-    #     mask: [1, 0, 0, 0]
-    observation:
-      - disturbance_func: white_noise
-        std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03]
\ No newline at end of file
diff --git a/benchmarking_sim/quadrotor/mb_experiment.py b/benchmarking_sim/quadrotor/mb_experiment.py
index c4db39418..66fab3c0c 100644
--- a/benchmarking_sim/quadrotor/mb_experiment.py
+++ b/benchmarking_sim/quadrotor/mb_experiment.py
@@ -1,7 +1,7 @@
 
 import os
-import sys
 import pickle
+import sys
 from collections import defaultdict
 from functools import partial
 
@@ -10,17 +10,17 @@
 from matplotlib.ticker import FormatStrFormatter
 
 from safe_control_gym.envs.benchmark_env import Task
+from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor
+from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import QuadType
 from safe_control_gym.experiments.base_experiment import BaseExperiment
-from safe_control_gym.experiments.epoch_experiments import EpochExperiment
 from safe_control_gym.utils.configuration import ConfigFactory
+from safe_control_gym.utils.gpmpc_plotting import make_quad_plots
 from safe_control_gym.utils.registration import make
 from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, timing
-from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor
-from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import QuadType
-from safe_control_gym.utils.gpmpc_plotting import make_quad_plots
 
 script_path = os.path.dirname(os.path.realpath(__file__))
 
+
 @timing
 def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
     '''The main function running experiments for model-based methods.
@@ -66,37 +66,37 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
         sys.argv[1:] = ['--algo', ALGO,
                         '--task', agent,
                         '--overrides',
-                            f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml',
-                            f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
+                        f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml',
+                        f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
                         '--seed', '1',
                         '--use_gpu', 'True',
                         '--output_dir', f'./{ALGO}/results',
-                            ]
+                        ]
     else:
-        MPSC_COST='one_step_cost'
+        MPSC_COST = 'one_step_cost'
         assert ALGO != 'gp_mpc', 'Safety filter not supported for gp_mpc'
         assert os.path.exists(f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml'), f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml does not exist'
         sys.argv[1:] = ['--algo', ALGO,
                         '--task', agent,
                         '--safety_filter', SAFETY_FILTER,
                         '--overrides',
-                            f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml',
-                            f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
-                            f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml',
+                        f'./config_overrides/{SYS}_{TASK}{ADDITIONAL}.yaml',
+                        f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
+                        f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml',
                         '--kv_overrides', f'sf_config.cost_function={MPSC_COST}',
                         '--seed', '2',
                         '--use_gpu', 'True',
                         '--output_dir', f'./{ALGO}/results',
-                            ]
+                        ]
     fac = ConfigFactory()
     fac.add_argument('--func', type=str, default='train', help='main function to run.')
     fac.add_argument('--n_episodes', type=int, default=1, help='number of episodes to run.')
     # merge config and create output directory
     config = fac.merge()
-    if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP']:
+    if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP']:
         num_data_max = config.algo_config.num_epochs * config.algo_config.num_samples
         config.output_dir = os.path.join(config.output_dir, PRIOR + '_' + repr(num_data_max))
-    print('output_dir',  config.algo_config.output_dir)
+    print('output_dir', config.algo_config.output_dir)
     set_dir_from_config(config)
     config.algo_config.output_dir = config.output_dir
     mkdirs(config.output_dir)
@@ -110,22 +110,36 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
     random_env = env_func(gui=False)
 
     # Create controller.
+    config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99]
+    config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99]
+    config.task_config.constraints[1].upper_bounds = [0.59, 0.436]
+    config.task_config.constraints[1].lower_bounds = [0.113, -0.436]
+
+    ctrl_env_func = partial(make,
+                            config.task,
+                            seed=config.seed,
+                            **config.task_config
+                            )
     ctrl = make(config.algo,
-                env_func,
+                ctrl_env_func,
                 seed=config.seed,
                 **config.algo_config
                 )
-    
+    config.task_config.constraints[0].upper_bounds = [0.9, 2, 1.45, 2, 0.75, 3]
+    config.task_config.constraints[0].lower_bounds = [-0.9, -2, 0.55, -2, -0.75, -3]
+    config.task_config.constraints[1].upper_bounds = [0.59336579, 0.43633232]
+    config.task_config.constraints[1].lower_bounds = [0.11264675, -0.43633232]
+
     # Setup safety filter
     if SAFETY_FILTER is not None:
         env_func_filter = partial(make,
-                                config.task,
-                                seed=config.seed,
-                                **config.task_config)
+                                  config.task,
+                                  seed=config.seed,
+                                  **config.task_config)
         safety_filter = make(config.safety_filter,
-                            env_func_filter,
-                            seed=config.seed,
-                            **config.sf_config)
+                             env_func_filter,
+                             seed=config.seed,
+                             **config.sf_config)
         safety_filter.reset()
 
     all_trajs = defaultdict(list)
@@ -140,19 +154,19 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
         static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state)
 
         # Create experiment, train, and run evaluation
-        if SAFETY_FILTER is None:  
-            if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP']:
+        if SAFETY_FILTER is None:
+            if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP']:
                 experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env)
                 if config.algo_config.num_epochs == 1:
                     print('Evaluating prior controller')
                 elif config.algo_config.gp_model_path is not None:
                     ctrl.load(config.algo_config.gp_model_path)
                 else:
-                    # manually launch training 
+                    # manually launch training
                     # (NOTE: not using launch_training method since calling plotting before eval will break the eval)
                     experiment.reset()
                     train_runs, test_runs = ctrl.learn(env=static_train_env)
-            else:   
+            else:
                 experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env)
                 experiment.launch_training()
         else:
@@ -169,21 +183,20 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
 
         # plotting training and evaluation results
         # training
-        if ALGO in ['gpmpc_acados', 'gp_mpc' , 'gpmpc_acados_TP'] and \
+        if ALGO in ['gpmpc_acados', 'gp_mpc', 'gpmpc_acados_TP'] and \
            config.algo_config.gp_model_path is None and \
            config.algo_config.num_epochs > 1:
-                if isinstance(static_env, Quadrotor):
-                    make_quad_plots(test_runs=test_runs, 
-                                    train_runs=train_runs, 
-                                    trajectory=ctrl.traj.T,
-                                    dir=ctrl.output_dir)
-        plot_quad_eval(trajs_data['obs'][0], 
-                       trajs_data['action'][0], 
-                    #    trajs_data['current_clipped_action'][0],
-                       ctrl.env, 
+            if isinstance(static_env, Quadrotor):
+                make_quad_plots(test_runs=test_runs,
+                                train_runs=train_runs,
+                                trajectory=ctrl.traj.T,
+                                dir=ctrl.output_dir)
+        plot_quad_eval(trajs_data['obs'][0],
+                       trajs_data['action'][0],
+                       #    trajs_data['current_clipped_action'][0],
+                       ctrl.env,
                        config.output_dir)
 
-
         # Close environments
         static_env.close()
         static_train_env.close()
@@ -211,6 +224,8 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
     print(f'pyb_client: {ctrl.env.PYB_CLIENT}')
 
 # def plot_quad_eval(state_stack, input_stack, clipped_action_stack, env, save_path=None):
+
+
 def plot_quad_eval(state_stack, input_stack, env, save_path=None):
     '''Plots the input and states to determine success.
 
@@ -235,7 +250,7 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):
         reference = np.tile(reference.reshape(1, model.nx), (plot_length, 1))
 
     # Plot states
-    fig, axs = plt.subplots(model.nx, figsize=(8, model.nx*1))
+    fig, axs = plt.subplots(model.nx, figsize=(8, model.nx * 1))
     for k in range(model.nx):
         axs[k].plot(times, np.array(state_stack).transpose()[k, 0:plot_length], label='actual')
         axs[k].plot(times, reference.transpose()[k, 0:plot_length], color='r', label='desired')
@@ -252,7 +267,7 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):
         plt.savefig(os.path.join(save_path, 'state_trajectories.png'))
 
     # Plot inputs
-    _, axs = plt.subplots(model.nu, figsize=(8, model.nu*1))
+    _, axs = plt.subplots(model.nu, figsize=(8, model.nu * 1))
     if model.nu == 1:
         axs = [axs]
     for k in range(model.nu):
@@ -270,9 +285,9 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):
 
     # plot the figure-eight
     fig, axs = plt.subplots(1)
-    axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], 
+    axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length],
              np.array(state_stack).transpose()[z_idx, 0:plot_length], label='actual')
-    axs.plot(reference.transpose()[x_idx, 0:plot_length], 
+    axs.plot(reference.transpose()[x_idx, 0:plot_length],
              reference.transpose()[z_idx, 0:plot_length], color='r', label='desired')
     axs.set_xlabel('x [m]')
     axs.set_ylabel('z [m]')
@@ -285,10 +300,10 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):
         print(f'Plots saved to {save_path}')
 
     fig, axs = plt.subplots(1)
-    axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length], 
+    axs.plot(np.array(state_stack).transpose()[x_idx, 0:plot_length],
              np.array(state_stack).transpose()[y_idx, 0:plot_length], label='actual')
     axs.plot(reference.transpose()[x_idx, 0:plot_length],
-                reference.transpose()[y_idx, 0:plot_length], color='r', label='desired')
+             reference.transpose()[y_idx, 0:plot_length], color='r', label='desired')
     axs.set_xlabel('x [m]')
     axs.set_ylabel('y [m]')
     axs.set_title('State path in x-y plane')
@@ -297,9 +312,10 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):
 
     if save_path is not None:
         plt.savefig(os.path.join(save_path, 'state_xy_path.png'))
-        
+
     # plt.show()
 
+
 def wrap2pi_vec(angle_vec):
     '''Wraps a vector of angles between -pi and pi.
 
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml
index a5009ab8c..099581cf8 100644
--- a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml
@@ -1,7 +1,8 @@
+algo: ppo
 algo_config:
   # model args
-  hidden_dim: 64
-  activation: "relu"
+  hidden_dim: 128
+  activation: tanh
 
   # loss args
   gamma: 0.98
@@ -18,48 +19,15 @@ algo_config:
   critic_lr: 0.001
 
   # runner args
-  max_env_steps: 660000
-  rollout_batch_size: 5
+  max_env_steps: 2640000
+  rollout_batch_size: 1
   rollout_steps: 660
   eval_batch_size: 10
 
   # misc
-  log_interval: 13200
-  save_interval: 660000
+  log_interval: 66000
+  save_interval: 1320000
   num_checkpoints: 0
-  eval_interval: 13200
+  eval_interval: 66000
   eval_save_best: True
   tensorboard: False
-
-# algo_config:
-#   # model args
-#   hidden_dim: 64
-#   activation: "tanh"
-
-#   # loss args
-#   gamma: 0.98
-#   use_gae: True
-#   gae_lambda: 0.9
-#   clip_param: 0.2
-#   target_kl: 2.32e-2
-#   entropy_coef: 0.09
-
-#   # optim args
-#   opt_epochs: 20
-#   mini_batch_size: 256
-#   actor_lr: 0.0012
-#   critic_lr: 0.0012
-
-#   # runner args
-#   max_env_steps: 396000
-#   rollout_batch_size: 5
-#   rollout_steps: 660
-#   eval_batch_size: 10
-
-#   # misc
-#   log_interval: 13200
-#   save_interval: 660000
-#   num_checkpoints: 0
-#   eval_interval: 13200
-#   eval_save_best: True
-#   tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
index 7cc1787d7..4110ab495 100644
--- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
+++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -14,56 +14,8 @@ task_config:
     init_z_dot: 0
     init_theta: 0
     init_theta_dot: 0
-  randomized_init: True
-  randomized_inertial_prop: True
-
-  init_state_randomization_info:
-    init_x:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_x_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_z:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_z_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_theta:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-    init_theta_dot:
-      distrib: 'uniform'
-      low: -0.05
-      high: 0.05
-
-  inertial_prop_randomization_info:
-    beta_1: # Nominal: 18.11
-      distrib: 'uniform'
-      low: -4
-      high: 4
-    beta_2: # Nominal: 3.68
-      distrib: 'uniform'
-      low: -0.7
-      high: 0.7
-    alpha_1:  # Nominal: -140.8
-      distrib: 'uniform'
-      low: -5
-      high: 10
-    alpha_2: # Nominal: -13.4
-      distrib: 'uniform'
-      low: -3
-      high: 3
-    alpha_3:  # Nominal: 124.8
-      distrib: 'uniform'
-      low: -5
-      high: 5
+  randomized_init: False
+  randomized_inertial_prop: False
 
   task: traj_tracking
   task_info:
@@ -82,35 +34,17 @@ task_config:
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [10., .1, 10., .1, .1, 0.001]
-  rew_act_weight: [.1, .1]
+  rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001]
+  rew_act_weight: [0.1, 0.1]
   rew_exponential: True
 
-  disturbances:
-    dynamics:
-      - disturbance_func: white_noise
-        std: 0.05
-    observation:
-      - disturbance_func: white_noise
-        std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.6e-01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-    action:
-      - disturbance_func: impulse
-        magnitude: 0.01
-        step_offset: 2
-        duration: 1
-        decary_rate: 1
-
   constraints:
     - constraint_form: default_constraint
       constrained_variable: state
-      # upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
-      # lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
+      upper_bounds: [ 0.9,  2, 1.45,  2,  0.75,  3]
+      lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
     - constraint_form: default_constraint
       constrained_variable: input
-      upper_bounds: [0.58212, 0.7]
-      lower_bounds: [0.09702, -0.7]
-      # upper_bounds: [0.47628, 0.4]
-      # lower_bounds: [0.079, -0.4]
 
   done_on_out_of_bound: True
   done_on_violation: False
diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh
index 48a8f9c65..fc8a3269c 100755
--- a/examples/rl/rl_experiment.sh
+++ b/examples/rl/rl_experiment.sh
@@ -34,11 +34,8 @@ do
           ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
       --kv_overrides \
           algo_config.training=False \
-          task_config.randomized_init=True \
           task_config.task_info.num_cycles=2 \
           task_config.task_info.ilqr_ref=False \
           task_config.task_info.ilqr_traj_data='../lqr/ilqr_ref_traj.npy' \
           task_config.noise_scale=${NS}
-#      --pretrain_path ./Results/Benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED}
 done
-#done
\ No newline at end of file
diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh
index 8e74f590c..1d17f7a55 100755
--- a/examples/rl/train_rl_model.sh
+++ b/examples/rl/train_rl_model.sh
@@ -54,11 +54,9 @@ do
         --overrides \
             ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
             ./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
-        --output_dir ./Results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \
+        --output_dir ./results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \
         --seed ${SEED} \
-        --use_gpu \
-        --kv_overrides \
-            task_config.randomized_init=True
+        --use_gpu
 done
 
 # Move the newly trained unsafe model.
diff --git a/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml
new file mode 100644
index 000000000..21146f069
--- /dev/null
+++ b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml
@@ -0,0 +1,32 @@
+safety_filter: mpsc_acados
+sf_config:
+  # LQR controller parameters
+  q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
+  r_mpc: [3., 3.]
+
+  prior_info:
+    prior_prop:
+      beta_1: 18.11298
+      beta_2: 3.6800
+      beta_3: 0
+      alpha_1: -140.8
+      alpha_2: -13.4
+      alpha_3: 124.8
+    randomize_prior_prop: False
+    prior_prop_rand_info: null
+
+  # MPC Parameters
+  use_acados: True
+  horizon: 25
+  warmstart: True
+  integration_algo: rk4
+  use_terminal_set: False
+
+  # Cost function
+  cost_function: one_step_cost
+  mpsc_cost_horizon: 5
+  decay_factor: 0.85
+
+  # Softening
+  soften_constraints: True
+  slack_cost: 1000.0
diff --git a/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml
new file mode 100644
index 000000000..c9a8f8f93
--- /dev/null
+++ b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml
@@ -0,0 +1,39 @@
+algo: ppo
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: tanh
+
+  # loss args
+  gamma: 0.98
+  use_gae: True
+  gae_lambda: 0.92
+  clip_param: 0.2
+  target_kl: 1.0e-2
+  entropy_coef: 0.005
+
+  # optim args
+  opt_epochs: 20
+  mini_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 2640000
+  rollout_batch_size: 1
+  rollout_steps: 660
+  eval_batch_size: 10
+
+  # misc
+  log_interval: 66000
+  save_interval: 1320000
+  num_checkpoints: 0
+  eval_interval: 66000
+  eval_save_best: True
+  tensorboard: False
+
+  # safety filter
+  filter_train_actions: False
+  penalize_sf_diff: False
+  sf_penalty: 1
+  use_safe_reset: False
diff --git a/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml
new file mode 100644
index 000000000..4e127affa
--- /dev/null
+++ b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml
@@ -0,0 +1,48 @@
+task_config:
+  info_in_reset: True
+  ctrl_freq: 60
+  pyb_freq: 60
+  physics: dyn_si
+  quad_type: 4
+
+  init_state:
+    init_x: 0
+    init_x_dot: 0
+    init_z: 1.0
+    init_z_dot: 0
+    init_theta: 0
+    init_theta_dot: 0
+  randomized_init: False
+  randomized_inertial_prop: False
+
+  task: traj_tracking
+  task_info:
+    trajectory_type: figure8
+    num_cycles: 2
+    trajectory_plane: 'xz'
+    trajectory_position_offset: [0, 1.]
+    trajectory_scale: 1.0
+
+  inertial_prop:
+    M: 0.033
+    Iyy: 1.4e-05
+
+  episode_len_sec: 11
+  cost: rl_reward
+  obs_goal_horizon: 1
+
+  # RL Reward
+  rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001]
+  rew_act_weight: [0.1, 0.1]
+  rew_exponential: True
+
+  constraints:
+    - constraint_form: default_constraint
+      constrained_variable: state
+      upper_bounds: [ 0.9,  2, 1.45,  2,  0.75,  3]
+      lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
+    - constraint_form: default_constraint
+      constrained_variable: input
+
+  done_on_out_of_bound: True
+  done_on_violation: False
diff --git a/experiments/mpsc/mpsc_experiment.py b/experiments/mpsc/mpsc_experiment.py
new file mode 100644
index 000000000..a82b0329b
--- /dev/null
+++ b/experiments/mpsc/mpsc_experiment.py
@@ -0,0 +1,173 @@
+'''This script tests the MPSC safety filter implementation.'''
+
+import pickle
+import shutil
+from functools import partial
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from safe_control_gym.experiments.base_experiment import BaseExperiment, MetricExtractor
+from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function
+from safe_control_gym.utils.configuration import ConfigFactory
+from safe_control_gym.utils.registration import make
+
+
+def run(plot=False, training=False, model='ppo'):
+    '''Main function to run MPSC experiments.
+
+    Returns:
+        X_GOAL (np.ndarray): The goal (stabilization or reference trajectory) of the experiment.
+        uncert_results (dict): The results of the uncertified experiment.
+        uncert_metrics (dict): The metrics of the uncertified experiment.
+        cert_results (dict): The results of the certified experiment.
+        cert_metrics (dict): The metrics of the certified experiment.
+    '''
+
+    # Create the configuration dictionary.
+    fac = ConfigFactory()
+    config = fac.merge()
+    config.algo_config['training'] = False
+    config.task_config['done_on_violation'] = False
+    config.task_config['randomized_init'] = False
+
+    system = 'quadrotor_2D_attitude'
+
+    # Create an environment
+    env_func = partial(make,
+                       config.task,
+                       **config.task_config)
+    env = env_func()
+
+    config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99]
+    config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99]
+    config.task_config.constraints[1].upper_bounds = [0.59, 0.436]
+    config.task_config.constraints[1].lower_bounds = [0.113, -0.436]
+    env_func = partial(make,
+                       config.task,
+                       **config.task_config)
+
+    # Setup controller.
+    ctrl = make(config.algo,
+                env_func,
+                **config.algo_config,
+                output_dir='./temp')
+
+    if config.algo in ['ppo', 'sac']:
+        # Load state_dict from trained.
+        ctrl.load(f'./models/rl_models/{model}/model_best.pt')
+
+        # Remove temporary files and directories
+        shutil.rmtree('./temp', ignore_errors=True)
+
+    # Run without safety filter
+    experiment = BaseExperiment(env, ctrl)
+    uncert_results, uncert_metrics = experiment.run_evaluation(n_episodes=1)
+    ctrl.reset()
+
+    # Setup MPSC.
+    safety_filter = make(config.safety_filter,
+                         env_func,
+                         **config.sf_config)
+    safety_filter.reset()
+
+    if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST:
+        safety_filter.cost_function.uncertified_controller = ctrl
+        safety_filter.cost_function.output_dir = '.'
+
+    if training is True:
+        train_env = env_func(randomized_init=True,
+                             init_state=None,
+                             normalized_rl_action_space=False,
+                             )
+        safety_filter.learn(env=train_env)
+        safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
+        raise SystemExit
+    else:
+        safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
+
+    # Run with safety filter
+    experiment = BaseExperiment(env, ctrl, safety_filter=safety_filter)
+    cert_results, cert_metrics = experiment.run_evaluation(n_episodes=1)
+    experiment.close()
+    safety_filter.close()
+
+    if plot is True:
+        fig = plt.figure()
+        ax = fig.add_subplot(1, 1, 1)
+        ax.plot(uncert_results['state'][0][:, 0], uncert_results['state'][0][:, 2], label='Uncertified', color='red')
+        ax.plot(cert_results['state'][0][:, 0], cert_results['state'][0][:, 2], label='Certified', color='green')
+        ax.plot(env.X_GOAL[:, 0], env.X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot')
+        rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6')
+        rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6')
+        ax.add_patch(rec1)
+        ax.add_patch(rec2)
+        rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6')
+        rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6')
+        ax.add_patch(rec3)
+        ax.add_patch(rec4)
+        plt.xlim(-1.1, 1.1)
+        plt.ylim(0.45, 1.55)
+        plt.xlabel('x [m]')
+        plt.ylabel('z [m]')
+        plt.legend()
+        plt.show()
+
+    elapsed_time_uncert = uncert_results['timestamp'][0][-1] - uncert_results['timestamp'][0][0]
+    elapsed_time_cert = cert_results['timestamp'][0][-1] - cert_results['timestamp'][0][0]
+
+    mpsc_results = cert_results['safety_filter_data'][0]
+    corrections = mpsc_results['correction'][0] * 10.0 > np.linalg.norm(cert_results['current_physical_action'][0] - safety_filter.U_EQ[0], axis=1)
+    corrections = np.append(corrections, False)
+
+    print('Total Uncertified (s):', elapsed_time_uncert)
+    print('Total Certified Time (s):', elapsed_time_cert)
+    print('Number of Corrections:', np.sum(corrections))
+    print('Sum of Corrections:', np.linalg.norm(mpsc_results['correction'][0]))
+    print('Max Correction:', np.max(np.abs(mpsc_results['correction'][0])))
+    print('Number of Feasible Iterations:', np.sum(mpsc_results['feasible'][0]))
+    print('Total Number of Iterations:', uncert_metrics['average_length'])
+    print('Total Number of Certified Iterations:', cert_metrics['average_length'])
+    print('Number of Violations:', uncert_metrics['average_constraint_violation'])
+    print('Number of Certified Violations:', cert_metrics['average_constraint_violation'])
+
+    return env.X_GOAL, uncert_results, uncert_metrics, cert_results, cert_metrics
+
+
+def run_multiple_models(plot, all_models):
+    '''Runs all models at every saved starting point.'''
+
+    fac = ConfigFactory()
+    config = fac.merge()
+
+    for model in all_models:
+        print(model)
+        for i in range(25 if not plot else 1):
+            X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model)
+            if i == 0:
+                all_uncert_results, all_cert_results = uncert_results, cert_results
+            else:
+                for key in all_cert_results.keys():
+                    if key in all_uncert_results:
+                        all_uncert_results[key].append(uncert_results[key][0])
+                    all_cert_results[key].append(cert_results[key][0])
+
+        met = MetricExtractor()
+        uncert_metrics = met.compute_metrics(data=all_uncert_results)
+        cert_metrics = met.compute_metrics(data=all_cert_results)
+
+        all_results = {'uncert_results': all_uncert_results,
+                       'uncert_metrics': uncert_metrics,
+                       'cert_results': all_cert_results,
+                       'cert_metrics': cert_metrics,
+                       'config': config,
+                       'X_GOAL': X_GOAL}
+
+        if not plot:
+            with open(f'./results_mpsc/{model}.pkl', 'wb') as f:
+                pickle.dump(all_results, f)
+
+
+if __name__ == '__main__':
+    # run(plot=True, training=False, model='none')
+    run_multiple_models(plot=True, all_models=['mpsf'])
diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh
new file mode 100755
index 000000000..7b0bf4358
--- /dev/null
+++ b/experiments/mpsc/mpsc_experiment.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+SYS='quadrotor_2D_attitude'
+TASK='tracking'
+ALGO='ppo'
+
+SAFETY_FILTER='mpsc_acados'
+# MPSC_COST='one_step_cost'
+MPSC_COST='precomputed_cost'
+MPSC_COST_HORIZON=25
+DECAY_FACTOR=1
+
+python3 ./mpsc_experiment.py \
+    --task quadrotor \
+    --algo ${ALGO} \
+    --safety_filter ${SAFETY_FILTER} \
+    --overrides \
+        ./config_overrides/${SYS}_${TASK}.yaml \
+        ./config_overrides/${ALGO}_${SYS}.yaml \
+        ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \
+    --kv_overrides \
+        sf_config.cost_function=${MPSC_COST} \
+        sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \
+        sf_config.decay_factor=${DECAY_FACTOR}
diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py
new file mode 100644
index 000000000..d2e9ecd89
--- /dev/null
+++ b/experiments/mpsc/plotting_results.py
@@ -0,0 +1,507 @@
+'''This script analyzes and plots the results from MPSC experiments.'''
+
+import pickle
+import sys
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from safe_control_gym.experiments.base_experiment import MetricExtractor
+from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative
+from safe_control_gym.utils.plotting import load_from_logs
+
+plot = True  # Saves figure if False
+
+U_EQ = np.array([0.3, 0])
+
+met = MetricExtractor()
+met.verbose = False
+
+
+def load_all_models(system, task, algo):
+    '''Loads the results of every experiment.
+
+    Args:
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+
+    Returns:
+        all_results (dict): A dictionary containing all the results.
+    '''
+
+    all_results = {}
+
+    for model in ordered_models:
+        with open(f'./results_mpsc/{model}.pkl', 'rb') as f:
+            all_results[model] = pickle.load(f)
+
+    return all_results
+
+
+def extract_magnitude_of_corrections(results_data):
+    '''Extracts the magnitude of corrections from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        magn_of_corrections (list): The list of magnitude of corrections for all experiments.
+    '''
+
+    magn_of_corrections = [np.linalg.norm(mpsc_results['correction'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']]
+    return magn_of_corrections
+
+
+def extract_max_correction(results_data):
+    '''Extracts the max correction from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        max_corrections (list): The list of max corrections for all experiments.
+    '''
+    max_corrections = [np.max(np.abs(mpsc_results['correction'][0])) for mpsc_results in results_data['cert_results']['safety_filter_data']]
+
+    return max_corrections
+
+
+def extract_number_of_corrections(results_data):
+    '''Extracts the number of corrections from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        num_corrections (list): The list of the number of corrections for all experiments.
+    '''
+    num_corrections = [np.sum(mpsc_results['correction'][0] * 10.0 > np.linalg.norm(results_data['cert_results']['current_clipped_action'][i] - U_EQ, axis=1)) for i, mpsc_results in enumerate(results_data['cert_results']['safety_filter_data'])]
+    return num_corrections
+
+
+def extract_feasible_iterations(results_data):
+    '''Extracts the number of feasible iterations from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        feasible_iterations (list): The list of the number of feasible iterations for all experiments.
+    '''
+    feasible_iterations = [np.sum(mpsc_results['feasible'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']]
+    return feasible_iterations
+
+
+def extract_rmse(results_data, certified=True):
+    '''Extracts the RMSEs from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        rmse (list): The list of RMSEs for all experiments.
+    '''
+    if certified:
+        met.data = results_data['cert_results']
+        rmse = np.asarray(met.get_episode_rmse())
+    else:
+        met.data = results_data['uncert_results']
+        rmse = np.asarray(met.get_episode_rmse())
+    return rmse
+
+
+def extract_length(results_data, certified=True):
+    '''Extracts the lengths from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+
+    Returns:
+        length (list): The list of lengths for all experiments.
+    '''
+    if certified:
+        met.data = results_data['cert_results']
+        length = np.asarray(met.get_episode_lengths())
+    else:
+        met.data = results_data['uncert_results']
+        length = np.asarray(met.get_episode_lengths())
+    return length
+
+
+def extract_simulation_time(results_data, certified=True):
+    '''Extracts the simulation time from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+        certified (bool): Whether to extract the certified data or uncertified data.
+
+    Returns:
+        sim_time (list): The list of simulation times for all experiments.
+    '''
+    if certified:
+        sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['cert_results']['timestamp']]
+    else:
+        sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['uncert_results']['timestamp']]
+
+    return sim_time
+
+
+def extract_constraint_violations(results_data, certified=True):
+    '''Extracts the simulation time from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+        certified (bool): Whether to extract the certified data or uncertified data.
+
+    Returns:
+        num_violations (list): The list of number of constraint violations for all experiments.
+    '''
+    if certified:
+        met.data = results_data['cert_results']
+        num_violations = np.asarray(met.get_episode_constraint_violation_steps())
+    else:
+        met.data = results_data['uncert_results']
+        num_violations = np.asarray(met.get_episode_constraint_violation_steps())
+
+    return num_violations
+
+
+def extract_rate_of_change(results_data, certified=True, order=1, mode='input'):
+    '''Extracts the rate of change of a signal from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+        certified (bool): Whether to extract the certified data or uncertified data.
+        order (int): Either 1 or 2, denoting the order of the derivative.
+        mode (string): Either 'input' or 'correction', denoting which signal to use.
+
+    Returns:
+        roc (list): The list of rate of changes.
+    '''
+    n = min(results_data['cert_results']['current_clipped_action'][0].shape)
+
+    if mode == 'input':
+        if certified:
+            all_signals = [actions - U_EQ for actions in results_data['cert_results']['current_clipped_action']]
+        else:
+            all_signals = [actions - U_EQ for actions in results_data['uncert_results']['current_clipped_action']]
+    elif mode == 'correction':
+        all_signals = [np.squeeze(mpsc_results['uncertified_action'][0]) - np.squeeze(mpsc_results['certified_action'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']]
+
+    total_derivatives = []
+    for signal in all_signals:
+        if n == 1:
+            ctrl_freq = 15
+            if mode == 'correction':
+                signal = np.atleast_2d(signal).T
+        elif n > 1:
+            ctrl_freq = 50
+        derivative = get_discrete_derivative(signal, ctrl_freq)
+        if order == 2:
+            derivative = get_discrete_derivative(derivative, ctrl_freq)
+        total_derivatives.append(np.linalg.norm(derivative, 'fro'))
+
+    return total_derivatives
+
+
+def extract_reward(results_data, certified):
+    '''Extracts the mean reward from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+        certified (bool): Whether to extract the certified data or uncertified data.
+
+    Returns:
+        mean_reward (list): The list of mean rewards.
+    '''
+    if certified:
+        met.data = results_data['cert_results']
+        returns = np.asarray(met.get_episode_returns())
+    else:
+        met.data = results_data['uncert_results']
+        returns = np.asarray(met.get_episode_returns())
+
+    return returns
+
+
+def extract_failed(results_data, certified):
+    '''Extracts the percent failed from an experiment's data.
+
+    Args:
+        results_data (dict): A dictionary containing all the data from the desired experiment.
+        certified (bool): Whether to extract the certified data or uncertified data.
+
+    Returns:
+        failed (list): The percent failed.
+    '''
+    if certified:
+        data = results_data['cert_results']
+    else:
+        data = results_data['uncert_results']
+
+    failed = [data['info'][i][-1]['out_of_bounds'] for i in range(len(data['info']))]
+
+    return [np.mean(failed)]
+
+
+def plot_model_comparisons(system, task, algo, data_extractor):
+    '''Plots the constraint violations of every controller for a specific experiment.
+
+    Args:
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+        data_extractor (func): The function which extracts the desired data.
+    '''
+
+    all_results = load_all_models(system, task, algo)
+
+    fig = plt.figure(figsize=(16.0, 10.0))
+    ax = fig.add_subplot(111)
+
+    labels = ordered_models
+
+    data = []
+
+    for model in ordered_models:
+        exp_data = all_results[model]
+        data.append(data_extractor(exp_data))
+
+    ylabel = data_extractor.__name__.replace('extract_', '').replace('_', ' ').title()
+    ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10)
+
+    x = np.arange(1, len(labels) + 1)
+    ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right')
+
+    medianprops = dict(linestyle='--', linewidth=2.5, color='black')
+    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False)
+
+    for patch, color in zip(bplot['boxes'], colors.values()):
+        patch.set_facecolor(color)
+
+    fig.tight_layout()
+    ax.set_ylim(ymin=0)
+
+    ax.yaxis.grid(True)
+
+    if plot is True:
+        plt.show()
+    else:
+        image_suffix = data_extractor.__name__.replace('extract_', '')
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
+    plt.close()
+
+
+def normalize_actions(actions):
+    '''Normalizes an array of actions.
+
+    Args:
+        actions (ndarray): The actions to be normalized.
+
+    Returns:
+        normalized_actions (ndarray): The normalized actions.
+    '''
+    if system_name == 'cartpole':
+        action_scale = 10.0
+        normalized_actions = actions / action_scale
+    elif system_name == 'quadrotor_2D':
+        hover_thrust = 0.1323
+        norm_act_scale = 0.1
+        normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale
+    else:
+        hover_thrust = 0.06615
+        norm_act_scale = 0.1
+        normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale
+
+    return normalized_actions
+
+
+def plot_all_logs(system, task, algo):
+    '''Plots comparative plots of all the logs.
+
+    Args:
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+    '''
+    all_results = {}
+
+    for model in ordered_models:
+        all_results[model] = []
+        all_results[model].append(load_from_logs(f'./models/rl_models/{model}/logs/'))
+
+    for key in all_results[ordered_models[0]][0].keys():
+        if key == 'stat_eval/ep_return':
+            plot_log(key, all_results)
+        if key == 'stat/constraint_violation':
+            plot_log(key, all_results)
+
+
+def plot_log(key, all_results):
+    '''Plots a comparative plot of the log 'key'.
+
+    Args:
+        key (str): The name of the log to be plotted.
+        all_results (dict): A dictionary of all the logged results for all models.
+    '''
+    fig = plt.figure(figsize=(16.0, 10.0))
+    ax = fig.add_subplot(111)
+
+    labels = ordered_models
+
+    for model, label in zip(ordered_models, labels):
+        x = all_results[model][0][key][1] / 1000
+        all_data = np.array([values[key][3] for values in all_results[model]])
+        ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model])
+        ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model])
+
+    ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10)
+    ax.set_xlabel('Training Episodes')
+    ax.legend()
+
+    fig.tight_layout()
+    ax.yaxis.grid(True)
+
+    if plot is True:
+        plt.show()
+    else:
+        image_suffix = key.replace('/', '__')
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
+    plt.close()
+
+
+def benchmark_plot(system, task, algo):
+    all_results = load_all_models(system, task, algo)
+    X_GOAL = all_results['mpsf']['X_GOAL']
+
+    uncert = all_results['none']['uncert_results']
+    mpsf = all_results['mpsf']['cert_results']
+    none = all_results['none']['cert_results']
+    mpc = all_results['mpc_acados']['trajs_data']
+
+    for i in [0]:
+        print('Uncert')
+        met.data = uncert
+        print('num_violations', calculate_state_violations(uncert, i))
+        print('exp_return', np.asarray(met.get_episode_returns())[i])
+
+        print('\nNone')
+        met.data = none
+        print('num_violations', calculate_state_violations(none, i))
+        print('exp_return', np.asarray(met.get_episode_returns())[i])
+
+        print('\nMPSF')
+        met.data = mpsf
+        print('num_violations', calculate_state_violations(mpsf, i))
+        print('exp_return', np.asarray(met.get_episode_returns())[i])
+        print('---------')
+
+        print('\nMPC')
+        met.data = mpc
+        print('num_violations', calculate_state_violations(mpc, i))
+        print('exp_return', np.asarray(met.get_episode_returns())[i])
+        print('---------')
+
+        fig = plt.figure()
+        ax = fig.add_subplot(1, 1, 1)
+        ax.plot(uncert['state'][i][:, 0], uncert['state'][i][:, 2], label='Uncertified', color='red')
+        ax.plot(none['state'][i][:, 0], none['state'][i][:, 2], label='Certified (Std.)', color='cornflowerblue')
+        ax.plot(mpsf['state'][i][:, 0], mpsf['state'][i][:, 2], label='Certified (Ours)', color='forestgreen')
+        ax.plot(mpc['state'][i][:, 0], mpc['state'][i][:, 2], label='MPC', color='plum')
+        ax.plot(X_GOAL[:, 0], X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot')
+        rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6')
+        rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6')
+        ax.add_patch(rec1)
+        ax.add_patch(rec2)
+        rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6')
+        rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6')
+        ax.add_patch(rec3)
+        ax.add_patch(rec4)
+        plt.xlim(-1.1, 1.1)
+        plt.ylim(0.45, 1.55)
+        plt.xlabel('x [m]')
+        plt.ylabel('z [m]')
+        plt.legend()
+        plt.show()
+
+
+def calculate_state_violations(data, i):
+    states = data['state'][i]
+    num_viols = np.sum(np.any(states[:, [0, 2]] > [0.9, 1.45], axis=1) | np.any(states[:, [0, 2]] < [-0.9, 0.55], axis=1))
+    return num_viols
+
+
+if __name__ == '__main__':
+    ordered_models = ['none', 'mpsf', 'mpc_acados']
+
+    colors = {
+        'mpsf': 'royalblue',
+        'none': 'plum',
+    }
+
+    def extract_rate_of_change_of_inputs(results_data, certified=True):
+        return extract_rate_of_change(results_data, certified, order=1, mode='input')
+
+    def extract_roc_cert(results_data, certified=True):
+        return extract_rate_of_change_of_inputs(results_data, certified)
+
+    def extract_roc_uncert(results_data, certified=False):
+        return extract_rate_of_change_of_inputs(results_data, certified)
+
+    def extract_rmse_cert(results_data, certified=True):
+        return extract_rmse(results_data, certified)
+
+    def extract_rmse_uncert(results_data, certified=False):
+        return extract_rmse(results_data, certified)
+
+    def extract_constraint_violations_cert(results_data, certified=True):
+        return extract_constraint_violations(results_data, certified)
+
+    def extract_constraint_violations_uncert(results_data, certified=False):
+        return extract_constraint_violations(results_data, certified)
+
+    def extract_reward_cert(results_data, certified=True):
+        return extract_reward(results_data, certified)
+
+    def extract_reward_uncert(results_data, certified=False):
+        return extract_reward(results_data, certified)
+
+    def extract_failed_cert(results_data, certified=True):
+        return extract_failed(results_data, certified)
+
+    def extract_failed_uncert(results_data, certified=False):
+        return extract_failed(results_data, certified)
+
+    def extract_length_cert(results_data, certified=True):
+        return extract_length(results_data, certified)
+
+    def extract_length_uncert(results_data, certified=False):
+        return extract_length(results_data, certified)
+
+    system_name = 'quadrotor_2D_attitude'
+    task_name = 'track'
+    algo_name = 'ppo'
+    if len(sys.argv) == 4:
+        system_name = sys.argv[1]
+        task_name = sys.argv[2]
+        algo_name = sys.argv[3]
+
+    benchmark_plot(system_name, task_name, algo_name)
+    # plot_all_logs(system_name, task_name, algo_name)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_number_of_corrections)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_length_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_length_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_cert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_uncert)
+    # plot_model_comparisons(system_name, task_name, algo_name, extract_feasible_iterations)
diff --git a/experiments/mpsc/train_model.sh b/experiments/mpsc/train_model.sh
new file mode 100755
index 000000000..5fd1696b0
--- /dev/null
+++ b/experiments/mpsc/train_model.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+SYS='quadrotor_2D_attitude'
+TASK='tracking'
+ALGO='ppo'
+
+SAFETY_FILTER='mpsc_acados'
+MPSC_COST='one_step_cost'
+FILTER=True
+SF_PEN=0.03
+
+if [ "$FILTER" == 'True' ]; then
+    TAG=mpsf
+else
+    TAG=none
+fi
+
+# Train the unsafe controller/agent.
+python3 train_rl.py \
+    --task quadrotor \
+    --algo ${ALGO} \
+    --safety_filter ${SAFETY_FILTER} \
+    --overrides \
+        ./config_overrides/${SYS}_${TASK}.yaml \
+        ./config_overrides/${ALGO}_${SYS}.yaml \
+        ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \
+    --output_dir ./models/rl_models/${TAG}/ \
+    --seed 2 \
+    --kv_overrides \
+        sf_config.cost_function=${MPSC_COST} \
+        sf_config.soften_constraints=True \
+        algo_config.filter_train_actions=${FILTER} \
+        algo_config.use_safe_reset=False \
+        algo_config.penalize_sf_diff=${FILTER} \
+        algo_config.sf_penalty=${SF_PEN} \
diff --git a/experiments/mpsc/train_rl.py b/experiments/mpsc/train_rl.py
new file mode 100644
index 000000000..a3f79014a
--- /dev/null
+++ b/experiments/mpsc/train_rl.py
@@ -0,0 +1,94 @@
+'''Template training/plotting/testing script.'''
+
+import os
+import shutil
+import time
+from functools import partial
+
+import munch
+import yaml
+
+from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function
+from safe_control_gym.utils.configuration import ConfigFactory
+from safe_control_gym.utils.plotting import plot_from_logs
+from safe_control_gym.utils.registration import make
+from safe_control_gym.utils.utils import mkdirs, set_device_from_config, set_seed_from_config
+
+
+def train():
+    '''Training template.
+
+    TODO: Add restore functionality
+    '''
+    # Create the configuration dictionary.
+    fac = ConfigFactory()
+    config = fac.merge()
+    config.algo_config['training'] = True
+
+    shutil.rmtree(config.output_dir, ignore_errors=True)
+
+    system = 'quadrotor_2D_attitude'
+
+    set_seed_from_config(config)
+    set_device_from_config(config)
+
+    # Define function to create task/env.
+    env_func = partial(make,
+                       config.task,
+                       output_dir=config.output_dir,
+                       **config.task_config
+                       )
+
+    # Create the controller/control_agent.
+    ctrl = make(config.algo,
+                env_func,
+                checkpoint_path=os.path.join(config.output_dir, 'model_latest.pt'),
+                output_dir=config.output_dir,
+                **config.algo_config)
+    ctrl.reset()
+
+    # Setup MPSC.
+    if config.algo in ['ppo', 'sac']:
+        safety_filter = make(config.safety_filter,
+                             env_func,
+                             **config.sf_config)
+        safety_filter.reset()
+
+        if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST:
+            safety_filter.cost_function.uncertified_controller = ctrl
+            safety_filter.cost_function.output_dir = '.'
+
+        safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
+
+        ctrl.safety_filter = safety_filter
+
+    # Training.
+    start_time = time.time()
+    ctrl.learn()
+    config['logging'] = {'total_learning_time': time.time() - start_time}
+    ctrl.close()
+    print('Training done.')
+
+    with open(os.path.join(config.output_dir, 'config.yaml'), 'w', encoding='UTF-8') as file:
+        yaml.dump(munch.unmunchify(config), file, default_flow_style=False)
+
+    make_plots(config)
+
+
+def make_plots(config):
+    '''Produces plots for logged stats during training.
+    Usage
+        * use with `--func plot` and `--restore {dir_path}` where `dir_path` is
+            the experiment folder containing the logs.
+        * save figures under `dir_path/plots/`.
+    '''
+    # Define source and target log locations.
+    log_dir = os.path.join(config.output_dir, 'logs')
+    plot_dir = os.path.join(config.output_dir, 'plots')
+    mkdirs(plot_dir)
+    plot_from_logs(log_dir, plot_dir, window=3)
+    print('Plotting done.')
+
+
+if __name__ == '__main__':
+    train()
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
index 82579603e..c70fc5b0d 100644
--- a/safe_control_gym/controllers/ppo/ppo.py
+++ b/safe_control_gym/controllers/ppo/ppo.py
@@ -1,4 +1,4 @@
-"""Proximal Policy Optimization (PPO)
+'''Proximal Policy Optimization (PPO)
 
 Based on:
     * https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/ppo/ppo.py
@@ -10,7 +10,7 @@
     * pytorch-a2c-ppo-acktr-gail - https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail
     * openai spinning up - ppo - https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ppo
     * stable baselines3 - ppo - https://github.com/DLR-RM/stable-baselines3/tree/master/stable_baselines3/ppo
-"""
+'''
 
 import os
 import time
@@ -30,7 +30,7 @@
 
 
 class PPO(BaseController):
-    """Proximal policy optimization."""
+    '''Proximal policy optimization.'''
 
     def __init__(self,
                  env_func,
@@ -40,6 +40,10 @@ def __init__(self,
                  use_gpu=False,
                  seed=0,
                  **kwargs):
+        self.filter_train_actions = False
+        self.penalize_sf_diff = False
+        self.sf_penalty = 1
+        self.use_safe_reset = False
         super().__init__(env_func, training, checkpoint_path, output_dir, use_gpu, seed, **kwargs)
 
         # Task.
@@ -49,6 +53,7 @@ def __init__(self,
             self.env = VecRecordEpisodeStatistics(self.env, self.deque_size)
             self.eval_env = env_func(seed=seed * 111)
             self.eval_env = RecordEpisodeStatistics(self.eval_env, self.deque_size)
+            self.model = self.get_prior(self.eval_env, self.prior_info)
         else:
             # Testing only.
             self.env = env_func()
@@ -84,8 +89,11 @@ def __init__(self,
             use_tensorboard = False
         self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard)
 
+        # Adding safety filter
+        self.safety_filter = None
+
     def reset(self):
-        """Do initializations for training or evaluation."""
+        '''Do initializations for training or evaluation.'''
         if self.training:
             # set up stats tracking
             self.env.add_tracker('constraint_violation', 0)
@@ -94,7 +102,9 @@ def reset(self):
             self.eval_env.add_tracker('mse', 0, mode='queue')
 
             self.total_steps = 0
-            obs, _ = self.env.reset()
+            obs, info = self.env_reset(self.env, self.use_safe_reset)
+            self.info = info['n'][0]
+            self.true_obs = obs
             self.obs = self.obs_normalizer(obs)
         else:
             # Add episodic stats to be tracked.
@@ -103,16 +113,16 @@ def reset(self):
             self.env.add_tracker('mse', 0, mode='queue')
 
     def close(self):
-        """Shuts down and cleans up lingering resources."""
+        '''Shuts down and cleans up lingering resources.'''
         self.env.close()
         if self.training:
             self.eval_env.close()
         self.logger.close()
 
     def save(self,
-             path
+             path,
              ):
-        """Saves model params and experiment state to checkpoint path."""
+        '''Saves model params and experiment state to checkpoint path.'''
         path_dir = os.path.dirname(path)
         os.makedirs(path_dir, exist_ok=True)
         state_dict = {
@@ -131,10 +141,10 @@ def save(self,
         torch.save(state_dict, path)
 
     def load(self,
-             path
+             path,
              ):
-        """Restores model and experiment given checkpoint path."""
-        state = torch.load(path)
+        '''Restores model and experiment given checkpoint path.'''
+        state = torch.load(path, map_location=torch.device('cpu'))
         # Restore policy.
         self.agent.load_state_dict(state['agent'])
         self.obs_normalizer.load_state_dict(state['obs_normalizer'])
@@ -151,45 +161,26 @@ def learn(self,
               env=None,
               **kwargs
               ):
-        """Performs learning (pre-training, training, fine-tuning, etc.)."""
-
-        # Initial Evaluation.
-        eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size)
-        self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format(
-            eval_results['ep_lengths'].mean(),
-            eval_results['ep_lengths'].std(),
-            eval_results['ep_returns'].mean(),
-            eval_results['ep_returns'].std()))
-
-        if self.num_checkpoints > 0:
-            step_interval = np.linspace(0, self.max_env_steps, self.num_checkpoints)
-            interval_save = np.zeros_like(step_interval, dtype=bool)
+        '''Performs learning (pre-training, training, fine-tuning, etc).'''
         while self.total_steps < self.max_env_steps:
             results = self.train_step()
             # Checkpoint.
-            if (self.total_steps >= self.max_env_steps
-                    or (self.save_interval and self.total_steps % self.save_interval == 0)):
+            if self.total_steps >= self.max_env_steps or (self.save_interval and self.total_steps % self.save_interval == 0):
                 # Latest/final checkpoint.
                 self.save(self.checkpoint_path)
                 self.logger.info(f'Checkpoint | {self.checkpoint_path}')
-                path = os.path.join(self.output_dir, 'checkpoints', 'model_{}.pt'.format(self.total_steps))
+            if self.num_checkpoints and self.total_steps % (self.max_env_steps // self.num_checkpoints) == 0:
+                # Intermediate checkpoint.
+                path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt')
                 self.save(path)
-            if self.num_checkpoints > 0:
-                interval_id = np.argmin(np.abs(np.array(step_interval) - self.total_steps))
-                if interval_save[interval_id] is False:
-                    # Intermediate checkpoint.
-                    path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt')
-                    self.save(path)
-                    interval_save[interval_id] = True
             # Evaluation.
             if self.eval_interval and self.total_steps % self.eval_interval == 0:
                 eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size)
                 results['eval'] = eval_results
-                self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format(
-                    eval_results['ep_lengths'].mean(),
-                    eval_results['ep_lengths'].std(),
-                    eval_results['ep_returns'].mean(),
-                    eval_results['ep_returns'].std()))
+                self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format(eval_results['ep_lengths'].mean(),
+                                                                                                            eval_results['ep_lengths'].std(),
+                                                                                                            eval_results['ep_returns'].mean(),
+                                                                                                            eval_results['ep_returns'].std()))
                 # Save best model.
                 eval_score = eval_results['ep_returns'].mean()
                 eval_best_score = getattr(self, 'eval_best_score', -np.infty)
@@ -200,8 +191,8 @@ def learn(self,
             if self.log_interval and self.total_steps % self.log_interval == 0:
                 self.log_step(results)
 
-    def select_action(self, obs, info=None, extra_info=False):
-        """Determine the action to take at the current timestep.
+    def select_action(self, obs, info=None):
+        '''Determine the action to take at the current timestep.
 
         Args:
             obs (ndarray): The observation at this timestep.
@@ -209,68 +200,21 @@ def select_action(self, obs, info=None, extra_info=False):
 
         Returns:
             action (ndarray): The action chosen by the controller.
-        """
+        '''
 
         with torch.no_grad():
             obs = torch.FloatTensor(obs).to(self.device)
-            action, v, logp = self.agent.ac.act(obs, True)
-        if extra_info:
-            return action, v, logp
-        return action
+            action = self.agent.ac.act(obs)
 
-    def train_step(self):
-        """Performs a training/fine-tuning step."""
-        self.agent.train()
-        self.obs_normalizer.unset_read_only()
-        rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size)
-        obs = self.obs
-        start = time.time()
-        for _ in range(self.rollout_steps):
-            with torch.no_grad():
-                act, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device))
-            next_obs, rew, done, info = self.env.step(act)
-            next_obs = self.obs_normalizer(next_obs)
-            rew = self.reward_normalizer(rew, done)
-            mask = 1 - done.astype(float)
-            # Time truncation is not the same as true termination.
-            terminal_v = np.zeros_like(v)
-            for idx, inf in enumerate(info['n']):
-                if 'terminal_info' not in inf:
-                    continue
-                inff = inf['terminal_info']
-                if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']:
-                    terminal_obs = inf['terminal_observation']
-                    terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device)
-                    terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy()
-                    terminal_v[idx] = terminal_val
-            rollouts.push({'obs': obs, 'act': act, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v})
-            obs = next_obs
-        self.obs = obs
-        self.total_steps += self.rollout_batch_size * self.rollout_steps
-        # Learn from rollout batch.
-        last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy()
-        ret, adv = compute_returns_and_advantages(rollouts.rew,
-                                                  rollouts.v,
-                                                  rollouts.mask,
-                                                  rollouts.terminal_v,
-                                                  last_val,
-                                                  gamma=self.gamma,
-                                                  use_gae=self.use_gae,
-                                                  gae_lambda=self.gae_lambda)
-        rollouts.ret = ret
-        # Prevent divide-by-0 for repetitive tasks.
-        rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6)
-        results = self.agent.update(rollouts, self.device)
-        results.update({'step': self.total_steps, 'elapsed_time': time.time() - start})
-        return results
+        return action
 
     def run(self,
             env=None,
             render=False,
-            n_episodes=1,
+            n_episodes=10,
             verbose=False,
             ):
-        """Runs evaluation with current policy."""
+        '''Runs evaluation with current policy.'''
         self.agent.eval()
         self.obs_normalizer.set_read_only()
         if env is None:
@@ -283,15 +227,30 @@ def run(self,
                 env.add_tracker('constraint_values', 0, mode='queue')
                 env.add_tracker('mse', 0, mode='queue')
 
-        obs, info = env.reset()
+        obs, info = self.env_reset(env, True)
+        true_obs = obs
         obs = self.obs_normalizer(obs)
         ep_returns, ep_lengths = [], []
         frames = []
-        mse, ep_rmse = [], []
+        total_return = 0
+        start = time.time()
         while len(ep_returns) < n_episodes:
             action = self.select_action(obs=obs, info=info)
-            obs, _, done, info = env.step(action)
-            mse.append(info['mse'])
+
+            # Adding safety filter
+            success = False
+            physical_action = env.denormalize_action(action)
+            unextended_obs = np.squeeze(true_obs)[:env.symbolic.nx]
+            certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
+            if success:
+                action = env.normalize_action(certified_action)
+            else:
+                self.safety_filter.ocp_solver.reset()
+
+            action = np.atleast_2d(np.squeeze([action]))
+            obs, rew, done, info = env.step(action)
+            total_return += rew
+
             if render:
                 env.render()
                 frames.append(env.render('rgb_array'))
@@ -299,18 +258,20 @@ def run(self,
                 print(f'obs {obs} | act {action}')
             if done:
                 assert 'episode' in info
-                ep_rmse.append(np.array(mse).mean()**0.5)
-                mse = []
-                ep_returns.append(info['episode']['r'])
+                ep_returns.append(total_return)
                 ep_lengths.append(info['episode']['l'])
-                obs, _ = env.reset()
+                obs, info = self.env_reset(env, True)
+                total_return = 0
+            true_obs = obs
             obs = self.obs_normalizer(obs)
         # Collect evaluation results.
         ep_lengths = np.asarray(ep_lengths)
         ep_returns = np.asarray(ep_returns)
-        eval_results = {'ep_returns': ep_returns, 'ep_lengths': ep_lengths,
-                        'rmse': np.array(ep_rmse).mean(),
-                        'rmse_std': np.array(ep_rmse).std()}
+        eval_results = {
+            'ep_returns': ep_returns,
+            'ep_lengths': ep_lengths,
+            'elapsed_time': time.time() - start
+        }
         if len(frames) > 0:
             eval_results['frames'] = frames
         # Other episodic stats from evaluation env.
@@ -319,20 +280,95 @@ def run(self,
             eval_results.update(queued_stats)
         return eval_results
 
+    def train_step(self):
+        '''Performs a training/fine-tuning step.'''
+        self.agent.train()
+        self.obs_normalizer.unset_read_only()
+        rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size)
+        obs = self.obs
+        true_obs = self.true_obs
+        info = self.info
+        start = time.time()
+        for _ in range(self.rollout_steps):
+            with torch.no_grad():
+                action, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device))
+                unsafe_action = action
+
+            # Adding safety filter
+            success = False
+            if self.safety_filter is not None and (self.filter_train_actions is True or self.penalize_sf_diff is True):
+                physical_action = self.env.envs[0].denormalize_action(action)
+                unextended_obs = np.squeeze(true_obs)[:self.env.envs[0].symbolic.nx]
+                certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
+                if success and self.filter_train_actions is True:
+                    action = self.env.envs[0].normalize_action(certified_action)
+                else:
+                    self.safety_filter.ocp_solver.reset()
+
+            action = np.atleast_2d(np.squeeze([action]))
+            next_obs, rew, done, info = self.env.step(action)
+            if done[0] and self.use_safe_reset:
+                next_obs, info = self.env_reset(self.env, self.use_safe_reset)
+            if self.penalize_sf_diff and success:
+                rew = np.log(rew)
+                rew -= self.sf_penalty * np.linalg.norm(physical_action - certified_action)
+                rew = np.exp(rew)
+            next_true_obs = next_obs
+            next_obs = self.obs_normalizer(next_obs)
+            rew = self.reward_normalizer(rew, done)
+            mask = 1 - done.astype(float)
+            # Time truncation is not the same as true termination.
+            terminal_v = np.zeros_like(v)
+            for idx, inf in enumerate(info['n']):
+                if 'terminal_info' not in inf:
+                    continue
+                inff = inf['terminal_info']
+                if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']:
+                    terminal_obs = inf['terminal_observation']
+                    terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device)
+                    terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy()
+                    terminal_v[idx] = terminal_val
+
+            rollouts.push({'obs': obs, 'act': unsafe_action, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v})
+            obs = next_obs
+            true_obs = next_true_obs
+            info = info['n'][0]
+        self.obs = obs
+        self.true_obs = true_obs
+        self.info = info
+        self.total_steps += self.rollout_batch_size * self.rollout_steps
+        # Learn from rollout batch.
+        last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy()
+        ret, adv = compute_returns_and_advantages(rollouts.rew,
+                                                  rollouts.v,
+                                                  rollouts.mask,
+                                                  rollouts.terminal_v,
+                                                  last_val,
+                                                  gamma=self.gamma,
+                                                  use_gae=self.use_gae,
+                                                  gae_lambda=self.gae_lambda)
+        rollouts.ret = ret
+        # Prevent divide-by-0 for repetitive tasks.
+        rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6)
+        results = self.agent.update(rollouts, self.device)
+        results.update({'step': self.total_steps, 'elapsed_time': time.time() - start})
+        return results
+
     def log_step(self,
                  results
                  ):
-        """Does logging after a training step."""
+        '''Does logging after a training step.'''
         step = results['step']
         # runner stats
         self.logger.add_scalars(
             {
                 'step': step,
-                'step_time': results['elapsed_time'],
-                'progress': step / self.max_env_steps
+                'progress': step / self.max_env_steps,
             },
             step,
-            prefix='time')
+            prefix='time',
+            write=False,
+            write_tb=False)
         # Learning stats.
         self.logger.add_scalars(
             {
@@ -341,44 +377,68 @@ def log_step(self,
             },
             step,
             prefix='loss')
-        
-        try:
-            # Performance stats.
-            ep_lengths = np.asarray(self.env.length_queue)
-            ep_returns = np.asarray(self.env.return_queue)
-            ep_constraint_violation = np.asarray(self.env.queued_stats['constraint_violation'])
+
+        # Performance stats.
+        ep_lengths = np.asarray(self.env.length_queue)
+        ep_returns = np.asarray(self.env.return_queue)
+        ep_constraint_violation = np.asarray(self.env.queued_stats['constraint_violation'])
+        self.logger.add_scalars(
+            {
+                'ep_length': ep_lengths.mean(),
+                'ep_return': ep_returns.mean(),
+                'ep_reward': (ep_returns / ep_lengths).mean(),
+                'ep_constraint_violation': ep_constraint_violation.mean(),
+                'step_time': results['elapsed_time'],
+            },
+            step,
+            prefix='stat')
+        # Total constraint violation during learning.
+        total_violations = self.env.accumulated_stats['constraint_violation']
+        self.logger.add_scalars({'constraint_violation': total_violations}, step, prefix='stat')
+        if 'eval' in results:
+            eval_ep_lengths = results['eval']['ep_lengths']
+            eval_ep_returns = results['eval']['ep_returns']
+            eval_constraint_violation = results['eval']['constraint_violation']
+            eval_mse = results['eval']['mse']
             self.logger.add_scalars(
                 {
-                    'ep_length': ep_lengths.mean(),
-                    'ep_return': ep_returns.mean(),
-                    'ep_return_std': ep_returns.std(),
-                    'ep_reward': (ep_returns / ep_lengths).mean(),
-                    'ep_constraint_violation': ep_constraint_violation.mean()
+                    'ep_length': eval_ep_lengths.mean(),
+                    'ep_return': eval_ep_returns.mean(),
+                    'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(),
+                    'constraint_violation': eval_constraint_violation.mean(),
+                    'mse': eval_mse.mean(),
+                    'step_time': results['eval']['elapsed_time'],
                 },
                 step,
-                prefix='stat')
-            # Total constraint violation during learning.
-            total_violations = self.env.accumulated_stats['constraint_violation']
-            self.logger.add_scalars({'constraint_violation': total_violations}, step, prefix='stat')
-            if 'eval' in results:
-                eval_ep_lengths = results['eval']['ep_lengths']
-                eval_ep_returns = results['eval']['ep_returns']
-                eval_constraint_violation = results['eval']['constraint_violation']
-                eval_rmse = results['eval']['rmse']
-                eval_rmse_std = results['eval']['rmse_std']
-                self.logger.add_scalars(
-                    {
-                        'ep_length': eval_ep_lengths.mean(),
-                        'ep_return': eval_ep_returns.mean(),
-                        'ep_return_std': eval_ep_returns.std(),
-                        'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(),
-                        'constraint_violation': eval_constraint_violation.mean(),
-                        'rmse': eval_rmse,
-                        'rmse_std': eval_rmse_std
-                    },
-                    step,
-                    prefix='stat_eval')
-        except:
-            pass
+                prefix='stat_eval')
         # Print summary table
         self.logger.dump_scalars()
+
+    def env_reset(self, env, use_safe_reset):
+        '''Resets the environment until a feasible initial state is found.
+
+        Args:
+            env (BenchmarkEnv): The environment that is being reset.
+            use_safe_reset (bool): Whether to safely reset the system using the SF.
+
+        Returns:
+            obs (ndarray): The initial observation.
+            info (dict): The initial info.
+        '''
+        success = False
+        action = self.model.U_EQ
+        obs, info = env.reset()
+        if self.safety_filter is not None:
+            self.safety_filter.reset_before_run()
+
+        if use_safe_reset is True and self.safety_filter is not None:
+            while success is not True or np.any(self.safety_filter.slack_prev > 1e-4):
+                obs, info = env.reset()
+                info['current_step'] = 1
+                unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx]
+                self.safety_filter.reset_before_run()
+                _, success = self.safety_filter.certify_action(unextended_obs, action, info)
+                if not success:
+                    self.safety_filter.ocp_solver.reset()
+
+        return obs, info
diff --git a/safe_control_gym/envs/constraints.py b/safe_control_gym/envs/constraints.py
index d7ff6fadd..3c98aaedd 100644
--- a/safe_control_gym/envs/constraints.py
+++ b/safe_control_gym/envs/constraints.py
@@ -106,7 +106,7 @@ def get_value(self,
             value (ndarray): The evaluation of the constraint.
         '''
         env_value = self.get_env_constraint_var(env)
-        return np.round_(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals)
+        return np.round(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals)
 
     def is_violated(self,
                     env,
@@ -443,7 +443,7 @@ def __init__(self,
         self.num_constraints = self.bound.shape[0]
 
     def get_value(self, env):
-        c_value = np.round_(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals)
+        c_value = np.round(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals)
         return c_value
 
     # TODO: temp addition
diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
index 5a0442a90..11e5ade6d 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
@@ -14,11 +14,10 @@
 from datetime import datetime
 from enum import Enum
 
+import casadi as cs
 import numpy as np
 import pybullet as p
 import pybullet_data
-import casadi as cs
-from termcolor import colored
 
 from safe_control_gym.envs.benchmark_env import BenchmarkEnv
 from safe_control_gym.math_and_models.transformations import csRotXYZ, get_angularvelocity_rpy
@@ -95,26 +94,26 @@ def __init__(self,
         self.RECORD = record
         # Load the drone properties from the .urdf file.
         self.MASS, \
-        self.L, \
-        self.THRUST2WEIGHT_RATIO, \
-        self.J, \
-        self.J_INV, \
-        self.KF, \
-        self.KM, \
-        self.COLLISION_H, \
-        self.COLLISION_R, \
-        self.COLLISION_Z_OFFSET, \
-        self.MAX_SPEED_KMH, \
-        self.GND_EFF_COEFF, \
-        self.PROP_RADIUS, \
-        self.DRAG_COEFF, \
-        self.DW_COEFF_1, \
-        self.DW_COEFF_2, \
-        self.DW_COEFF_3, \
-        self.PWM2RPM_SCALE, \
-        self.PWM2RPM_CONST, \
-        self.MIN_PWM, \
-        self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH)
+            self.L, \
+            self.THRUST2WEIGHT_RATIO, \
+            self.J, \
+            self.J_INV, \
+            self.KF, \
+            self.KM, \
+            self.COLLISION_H, \
+            self.COLLISION_R, \
+            self.COLLISION_Z_OFFSET, \
+            self.MAX_SPEED_KMH, \
+            self.GND_EFF_COEFF, \
+            self.PROP_RADIUS, \
+            self.DRAG_COEFF, \
+            self.DW_COEFF_1, \
+            self.DW_COEFF_2, \
+            self.DW_COEFF_3, \
+            self.PWM2RPM_SCALE, \
+            self.PWM2RPM_CONST, \
+            self.MIN_PWM, \
+            self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH)
         self.GROUND_PLANE_Z = -0.05
         if verbose:
             print(
@@ -273,8 +272,6 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
                                          `_preprocess_action()` in each subclass.
             disturbance_force (ndarray, optional): Disturbance force, applied to all drones.
         '''
-        time_before_stepping = time.time()
-        # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4))
         clipped_action = np.expand_dims(clipped_action, axis=0)
 
         # Repeat for as many as the aggregate physics steps.
@@ -283,7 +280,7 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
             # Between aggregate steps for certain types of update.
             if self.PYB_STEPS_PER_CTRL > 1 and self.PHYSICS in [
                 Physics.DYN, Physics.PYB_GND, Physics.PYB_DRAG,
-                Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4 #, Physics.DYN_2D
+                Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4  # , Physics.DYN_2D
             ]:
                 self._update_and_store_kinematic_information()
             # Step the simulation using the desired physics update.
@@ -321,9 +318,9 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
                 if disturbance_force is not None:
                     pos = self._get_drone_state_vector(i)[:3]
                     '''
-                    NOTE: applyExternalForce only works when explicitly 
+                    NOTE: applyExternalForce only works when explicitly
                     stepping the simulation with p.stepSimulation().
-                    Therefore, 
+                    Therefore,
                     '''
                     p.applyExternalForce(
                         self.DRONE_IDS[i],
@@ -333,12 +330,12 @@ def _advance_simulation(self, clipped_action, disturbance_force=None):
                         flags=p.WORLD_FRAME,
                         physicsClientId=self.PYB_CLIENT)
             # PyBullet computes the new state, unless Physics.DYN.
-            if self.PHYSICS not in [Physics.DYN, Physics.RK4, Physics.DYN_2D, Physics.DYN_SI, 
+            if self.PHYSICS not in [Physics.DYN, Physics.RK4, Physics.DYN_2D, Physics.DYN_SI,
                                     Physics.DYN_SI_3D, Physics.DYN_SI_3D_10]:
                 p.stepSimulation(physicsClientId=self.PYB_CLIENT)
             # Save the last applied action (e.g. to compute drag).
             self.last_clipped_action = clipped_action
-        if self.PHYSICS in [Physics.DYN_2D, Physics.DYN_SI, 
+        if self.PHYSICS in [Physics.DYN_2D, Physics.DYN_SI,
                             Physics.DYN_SI_3D, Physics.DYN_SI_3D_10]:
             # set the state of the drone after stepping with the analytical model
             self._set_pybullet_information()
@@ -491,7 +488,7 @@ def _ground_effect(self, rpm, nth_drone):
         ])
         prop_heights = np.clip(prop_heights, self.GND_EFF_H_CLIP, np.inf)
         gnd_effects = np.array(rpm ** 2) * self.KF * self.GND_EFF_COEFF \
-                      * (self.PROP_RADIUS / (4 * prop_heights)) ** 2
+            * (self.PROP_RADIUS / (4 * prop_heights)) ** 2
         if np.abs(self.rpy[nth_drone, 0]) < np.pi / 2 and np.abs(
                 self.rpy[nth_drone, 1]) < np.pi / 2:
             for i in range(4):
@@ -649,7 +646,6 @@ def _dynamics_rk4(self, rpm, nth_drone):
         self.rpy_rates[nth_drone, :] = rpy_rates
 
     def setup_rk4_dynamics_expression(self):
-        nx, nu = 12, 4
         gamma = self.KM / self.KF
         z = cs.MX.sym('z')
         z_dot = cs.MX.sym('z_dot')
@@ -666,7 +662,7 @@ def setup_rk4_dynamics_expression(self):
         # PyBullet Euler angles use the SDFormat for rotation matrices.
         Rob = csRotXYZ(phi, theta, psi)  # rotation matrix transforming a vector in the body frame to the world frame.
 
-        # Define state variables.   
+        # Define state variables.
         X = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p_body, q_body, r_body)
 
         # Define inputs.
@@ -689,14 +685,14 @@ def setup_rk4_dynamics_expression(self):
                         self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4),
                         gamma * (-f1 + f2 - f3 + f4))
         rate_dot = self.J_INV @ (
-                    Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body)))
+            Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body)))
         ang_dot = cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)],
                                [0, cs.cos(phi), -cs.sin(phi)],
                                [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @ cs.vertcat(p_body,
                                                                                                             q_body,
                                                                                                             r_body)
         X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot)
-        self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot])
+        self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot])
         self.fd_func = cs.integrator('fd', 'rk', {'x': X,
                                                   'p': U,
                                                   'ode': X_dot}, {'tf': self.PYB_TIMESTEP})
@@ -715,7 +711,6 @@ def _dynamics_2d(self, rpm, nth_drone):
         rpy = self.rpy[nth_drone, :]
         vel = self.vel[nth_drone, :]
         ang_v = self.ang_v[nth_drone, :]
-        rpy_rates = self.rpy_rates[nth_drone, :]
         # rotation = np.array(p.getMatrixFromQuaternion(quat)).reshape(3, 3)
 
         # Compute forces and torques.
@@ -728,7 +723,7 @@ def _dynamics_2d(self, rpm, nth_drone):
         # update state with RK4
         # next_state = self.fd_func(x0=state, p=input)['xf'].full()[:, 0]
         X_dot = self.X_dot_fun(state, action).full()[:, 0]
-        next_state = state + X_dot*self.PYB_TIMESTEP
+        next_state = state + X_dot * self.PYB_TIMESTEP
 
         # Updated information
         pos = np.array([next_state[0], 0, next_state[4]])
@@ -794,13 +789,13 @@ def setup_dynamics_2d_expression(self):
                         self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4),
                         gamma * (-f1 + f2 - f3 + f4))
         rate_dot = self.J_INV @ (
-                Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body)))
+            Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body)))
         ang_dot = (cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)],
                                [0, cs.cos(phi), -cs.sin(phi)],
                                [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @
                    cs.vertcat(p_body, q_body, r_body))
         X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot)
-        self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot])
+        self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot])
 
     def _dynamics_si(self, action, nth_drone, disturbance_force=None):
         '''Explicit dynamics implementation from the identified model.
@@ -817,7 +812,6 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None):
         # quat = self.quat[nth_drone, :]
         rpy = self.rpy[nth_drone, :]
         vel = self.vel[nth_drone, :]
-        ang_v = self.ang_v[nth_drone, :]
         rpy_rates = self.rpy_rates[nth_drone, :]
 
         # Compute forces and torques.
@@ -827,7 +821,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None):
         # update state
         if disturbance_force is not None:
             d = np.array([disturbance_force[0], disturbance_force[2]])
-        else: 
+        else:
             d = np.array([0, 0])
         # perform euler integration
         # next_state = state + self.PYB_TIMESTEP * self.X_dot_fun(state, action, d).full()[:, 0]
@@ -836,7 +830,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None):
         k2 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k1, action, d).full()[:, 0]
         k3 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k2, action, d).full()[:, 0]
         k4 = self.X_dot_fun(state + self.PYB_TIMESTEP * k3, action, d).full()[:, 0]
-        next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2*k2 + 2*k3 + k4)
+        next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
 
         # Updated information
         pos = np.array([next_state[0], 0, next_state[2]])
@@ -860,19 +854,19 @@ def setup_dynamics_si_expression(self):
         theta_dot = cs.MX.sym('theta_dot')  # Pitch
         X = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot)
         g = self.GRAVITY_ACC
-        d = cs.MX.sym('d', 2, 1) # disturbance force
+        d = cs.MX.sym('d', 2, 1)  # disturbance force
 
         # Define inputs.
-        T = cs.MX.sym('T') # normlized thrust [N]
+        T = cs.MX.sym('T')  # normlized thrust [N]
         P = cs.MX.sym('P')  # desired pitch angle [rad]
         U = cs.vertcat(T, P)
         X_dot = cs.vertcat(x_dot,
-                            (18.112984649321753 * T+ 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS,
-                            z_dot,
-                            (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS,
-                            theta_dot,
-                            -140.8 * theta - 13.4 * theta_dot + 124.8 * P)
-        self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot])
+                           (18.112984649321753 * T + 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS,
+                           z_dot,
+                           (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS,
+                           theta_dot,
+                           -140.8 * theta - 13.4 * theta_dot + 124.8 * P)
+        self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot])
 
     def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None):
         '''Explicit dynamics implementation from the identified model.
@@ -889,7 +883,6 @@ def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None):
         rpy = self.rpy[nth_drone, :]
         vel = self.vel[nth_drone, :]
         ang_v = self.ang_v[nth_drone, :]
-        rpy_rates = self.rpy_rates[nth_drone, :]
 
         # Compute forces and torques.
         # Update state with discrete time dynamics.
@@ -973,21 +966,21 @@ def setup_dynamics_si_3d_expression(self):
         params_pitch_rate = [-99.94, -13.3, 84.73]
         params_yaw_rate = [0, 0, 0]
         X_dot = cs.vertcat(x_dot,
-                            (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS,
-                            y_dot,
-                            (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
-                            z_dot,
-                            (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS,
-                            phi_dot,
-                            theta_dot,
-                            psi_dot,
-                            params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R,
-                            params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P,
-                            params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y)
-
-        self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot])
-    
+                           (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS,
+                           y_dot,
+                           (params_acc[0] * T + params_acc[1]) * (
+                               cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
+                           z_dot,
+                           (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS,
+                           phi_dot,
+                           theta_dot,
+                           psi_dot,
+                           params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R,
+                           params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P,
+                           params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y)
+
+        self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot])
+
     def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None):
         '''Explicit dynamics implementation from the identified model.
            NOTE: The dynamics update is independent of the pybullet simulation.
@@ -1003,7 +996,6 @@ def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None):
         rpy = self.rpy[nth_drone, :]
         vel = self.vel[nth_drone, :]
         ang_v = self.ang_v[nth_drone, :]
-        rpy_rates = self.rpy_rates[nth_drone, :]
 
         # Compute forces and torques.
         # Update state with discrete time dynamics.
@@ -1036,7 +1028,7 @@ def _dynamics_si_3d_10(self, action, nth_drone, disturbance_force=None):
         self.ang_v[nth_drone, :] = ang_v.copy()
 
     def setup_dynamics_si_3d_10_expression(self):
-       # Casadi states
+        # Casadi states
         x = cs.MX.sym('x')
         y = cs.MX.sym('y')
         z = cs.MX.sym('z')
@@ -1083,19 +1075,19 @@ def setup_dynamics_si_3d_10_expression(self):
         psi = 0
 
         X_dot = cs.vertcat(x_dot,
-                            (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS,
-                            y_dot,
-                            (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
-                            z_dot,
-                            (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS,
-                            phi_dot,
-                            theta_dot,
-                            params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R,
-                            params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P,
+                           (params_acc[0] * T + params_acc[1]) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS,
+                           y_dot,
+                           (params_acc[0] * T + params_acc[1]) * (
+                               cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
+                           z_dot,
+                           (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS,
+                           phi_dot,
+                           theta_dot,
+                           params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R,
+                           params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P,
                            )
 
-        self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot])
+        self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot])
 
     def _show_drone_local_axes(self, nth_drone):
         '''Draws the local frame of the n-th drone in PyBullet's GUI.
@@ -1167,5 +1159,5 @@ def _parse_urdf_parameters(self, file_name):
         MIN_PWM = float(URDF_TREE[0].attrib['pwm_min'])
         MAX_PWM = float(URDF_TREE[0].attrib['pwm_max'])
         return M, L, THRUST2WEIGHT_RATIO, J, J_INV, KF, KM, COLLISION_H, COLLISION_R, COLLISION_Z_OFFSET, MAX_SPEED_KMH, \
-               GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \
-               PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM
\ No newline at end of file
+            GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \
+            PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
index 7e4f337cc..6baa35db7 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
@@ -235,7 +235,7 @@ def __init__(self,
                     (self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S and len(info_mse_metric_state_weight) == 5) or \
                     (self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE and len(info_mse_metric_state_weight) == 12) or \
                     (self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE_10 and len(info_mse_metric_state_weight) == 10):
-            
+
                 self.info_mse_metric_state_weight = np.array(info_mse_metric_state_weight, ndmin=1, dtype=float)
             else:
                 raise ValueError('[ERROR] in Quadrotor.__init__(), wrong info_mse_metric_state_weight argument size.')
@@ -256,7 +256,7 @@ def __init__(self,
             QuadType.THREE_D_ATTITUDE: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot',
                                         'init_phi', 'init_theta', 'init_psi', 'init_p', 'init_q', 'init_r'],
             QuadType.THREE_D_ATTITUDE_10: ['init_x', 'init_x_dot', 'init_y', 'init_y_dot', 'init_z', 'init_z_dot',
-                                             'init_phi', 'init_theta', 'init_p', 'init_q'],
+                                           'init_phi', 'init_theta', 'init_p', 'init_q'],
         }
         if init_state is None:
             for init_name in self.INIT_STATE_RAND_INFO:  # Default zero state.
@@ -283,8 +283,8 @@ def __init__(self,
             self.INERTIAL_PROP_RAND_INFO.pop('Izz', None)
         elif self.QUAD_TYPE == QuadType.TWO_D or \
                 self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or \
-                self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S \
-                    or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY:
+                self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S or \
+                self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY:
             # Only randomize Iyy for the 2D quadrotor.
             self.INERTIAL_PROP_RAND_INFO.pop('Ixx', None)
             self.INERTIAL_PROP_RAND_INFO.pop('Izz', None)
@@ -691,7 +691,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
             # Define observation.
             Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot)
         elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE:
-                # identified parameters for the 2D attitude interface
+            # identified parameters for the 2D attitude interface
             # NOTE: these parameters are not set in the prior_prop dict
             # since they are specific to the 2D attitude model
             self.beta_1 = prior_prop.get('beta_1', 18.112984649321753)
@@ -739,7 +739,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
             P_mapping = self.alpha_1 * (theta + self.pitch_bias) + self.alpha_2 * theta_dot + self.alpha_3 * P
             self.T_mapping_func = cs.Function('T_mapping', [T], [T_mapping])
             self.P_mapping_func = cs.Function('P_mapping', [theta, theta_dot, P], [P_mapping])
-        
+
         elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_BODY:
             nx, nu = 6, 2
             # Define states.
@@ -758,18 +758,18 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
             # With the formulat F_desired = b_F * T + a_F
 
             # Define dynamics equations.
-            X_dot = cs.vertcat(vx *cs.cos(theta) - vz * cs.sin(theta),
+            X_dot = cs.vertcat(vx * cs.cos(theta) - vz * cs.sin(theta),
                                vz * theta_dot - g * cs.sin(theta),
                                vx * cs.sin(theta) + vz * cs.cos(theta),
-                               -vx * theta_dot - g * cs.cos(theta) + (beta_1 * T + beta_2),
+                               -vx * theta_dot - g * cs.cos(theta) + (self.beta_1 * T + self.beta_2),
                                -theta_dot,
-                               alpha_1 * (-theta + pitch_bias) + alpha_2 * -theta_dot + alpha_3 * P)
+                               self.alpha_1 * (-theta + self.pitch_bias) + self.alpha_2 * -theta_dot + self.alpha_3 * P)
             # Define observation.
             x_dot = vx * cs.cos(theta) + vz * cs.sin(theta)
             z_dot = -vx * cs.sin(theta) + vz * cs.cos(theta)
             # Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot)
             Y = cs.vertcat(x, vx, z, vz, theta, theta_dot)
-            T_mapping = beta_1 * T + beta_2
+            T_mapping = self.beta_1 * T + self.beta_2
             self.T_mapping_func = cs.Function('T_mapping', [T], [T_mapping])
 
         elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S:
@@ -878,10 +878,10 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
             # TODO: create a parameter for the new quad model
             X_dot = cs.vertcat(x_dot,
                                (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)),
+                                   cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)),
                                y_dot,
                                (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
+                                   cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
                                z_dot,
                                (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g,
                                phi_dot,
@@ -892,7 +892,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
                                params_yaw_rate[0] * psi + params_yaw_rate[1] * psi_dot + params_yaw_rate[2] * Y)
             # Define observation.
             Y = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, phi_dot, theta_dot, psi_dot)
-        
+
         elif self.QUAD_TYPE == QuadType.THREE_D_ATTITUDE_10:
             nx, nu = 10, 3
             # Define states.
@@ -921,17 +921,17 @@ def _setup_symbolic(self, prior_prop={}, **kwargs):
             # TODO: create a parameter for the new quad model
             X_dot = cs.vertcat(x_dot,
                                (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)),
+                                   cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)),
                                y_dot,
                                (params_acc[0] * T + params_acc[1]) * (
-                                           cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
+                                   cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)),
                                z_dot,
                                (params_acc[0] * T + params_acc[1]) * cs.cos(phi) * cs.cos(theta) - g,
                                phi_dot,
                                theta_dot,
                                params_roll_rate[0] * phi + params_roll_rate[1] * phi_dot + params_roll_rate[2] * R,
                                params_pitch_rate[0] * theta + params_pitch_rate[1] * theta_dot + params_pitch_rate[2] * P,
-                                 )
+                               )
             # Define observation.
             Y = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, phi_dot, theta_dot)
 
@@ -1068,8 +1068,8 @@ def _set_action_space(self):
             else:
                 self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim
 
-            self.action_scale = (self.physical_action_bounds[1]-self.physical_action_bounds[0])/2
-            self.action_bias = (self.physical_action_bounds[1]+self.physical_action_bounds[0])/2
+            self.action_scale = (self.physical_action_bounds[1] - self.physical_action_bounds[0]) / 2
+            self.action_bias = (self.physical_action_bounds[1] + self.physical_action_bounds[0]) / 2
             self.action_space = spaces.Box(low=-np.ones(action_dim),
                                            high=np.ones(action_dim),
                                            dtype=np.float32)
@@ -1166,10 +1166,10 @@ def _set_observation_space(self):
                 self.phi_dot_threshold_radians, self.theta_dot_threshold_radians
             ])
             self.STATE_LABELS = ['x', 'x_dot', 'y', 'y_dot', 'z', 'z_dot',
-                                    'phi', 'theta', 'phi_dot', 'theta_dot']
+                                 'phi', 'theta', 'phi_dot', 'theta_dot']
             self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'm', 'm/s',
                                 'rad', 'rad', 'rad/s', 'rad/s']
-            
+
         # Define the state space for the dynamics.
         self.state_space = spaces.Box(low=low, high=high, dtype=np.float32)
 
@@ -1267,7 +1267,7 @@ def normalize_action(self, action):
             # else:
             #     action = (action / self.hover_thrust - 1) / self.norm_act_scale
 
-            action = (action - self.action_bias)/self.action_scale
+            action = (action - self.action_bias) / self.action_scale
 
         return action
 
@@ -1297,7 +1297,7 @@ def denormalize_action(self, action):
             # else:
             #     action = (1 + self.norm_act_scale * action) * self.hover_thrust
 
-            action = action*self.action_scale + self.action_bias
+            action = action * self.action_scale + self.action_bias
             # action = np.clip(action, self.action_space.low, self.action_space.high)
 
         return action
@@ -1498,9 +1498,9 @@ def _get_info(self):
         # Filter only relevant dimensions.
         state_error = state_error * self.info_mse_metric_state_weight
         info['mse'] = np.sum(state_error ** 2)
-        # if self.constraints is not None:
-        #     info['constraint_values'] = self.constraints.get_values(self)
-        #     info['constraint_violations'] = self.constraints.get_violations(self)
+        if self.constraints is not None:
+            info['constraint_values'] = self.constraints.get_values(self)
+            info['constraint_violations'] = self.constraints.get_violations(self)
         return info
 
     def _get_reset_info(self):
@@ -1515,4 +1515,4 @@ def _get_reset_info(self):
         }, 'x_reference': self.X_GOAL, 'u_reference': self.U_GOAL}
         if self.constraints is not None:
             info['symbolic_constraints'] = self.constraints.get_all_symbolic_models()
-        return info
\ No newline at end of file
+        return info
diff --git a/safe_control_gym/safety_filters/__init__.py b/safe_control_gym/safety_filters/__init__.py
index 2d53e550f..41527f16c 100644
--- a/safe_control_gym/safety_filters/__init__.py
+++ b/safe_control_gym/safety_filters/__init__.py
@@ -6,6 +6,10 @@
          entry_point='safe_control_gym.safety_filters.mpsc.linear_mpsc:LINEAR_MPSC',
          config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')
 
+register(idx='mpsc_acados',
+         entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS',
+         config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')
+
 register(idx='cbf',
          entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF',
          config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml')
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.py b/safe_control_gym/safety_filters/mpsc/mpsc.py
index 34307fe8e..251c58985 100644
--- a/safe_control_gym/safety_filters/mpsc/mpsc.py
+++ b/safe_control_gym/safety_filters/mpsc/mpsc.py
@@ -17,6 +17,7 @@
 from safe_control_gym.controllers.mpc.mpc_utils import get_cost_weight_matrix, reset_constraints
 from safe_control_gym.safety_filters.base_safety_filter import BaseSafetyFilter
 from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.one_step_cost import ONE_STEP_COST
+from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.precomputed_cost import PRECOMPUTED_COST
 from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, get_trajectory_on_horizon
 
 
@@ -26,13 +27,16 @@ class MPSC(BaseSafetyFilter, ABC):
     def __init__(self,
                  env_func,
                  horizon: int = 10,
-                 q_lin: list = None,
-                 r_lin: list = None,
+                 q_mpc: list = None,
+                 r_mpc: list = None,
                  integration_algo: str = 'rk4',
                  warmstart: bool = True,
                  additional_constraints: list = None,
                  use_terminal_set: bool = True,
                  cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
+                 mpsc_cost_horizon: int = 5,
+                 decay_factor: float = 0.85,
+                 use_acados: bool = False,
                  **kwargs
                  ):
         '''Initialize the MPSC.
@@ -40,13 +44,15 @@ def __init__(self,
         Args:
             env_func (partial BenchmarkEnv): Environment for the task.
             horizon (int): The MPC horizon.
-            q_lin, r_lin (list): Q and R gain matrices for linear controller.
+            q_mpc, r_mpc (list): Q and R gain matrices for linear controller.
             integration_algo (str): The algorithm used for integrating the dynamics,
                 either 'LTI', 'rk4', 'rk', or 'cvodes'.
             warmstart (bool): If the previous MPC soln should be used to warmstart the next mpc step.
             additional_constraints (list): List of additional constraints to consider.
             use_terminal_set (bool): Whether to use a terminal set constraint or not.
             cost_function (Cost_Function): A string (from Cost_Function) representing the cost function to be used.
+            mpsc_cost_horizon (int): How many steps forward to check for constraint violations.
+            decay_factor (float): How much to discount future costs.
         '''
 
         # Store all params/args.
@@ -62,15 +68,14 @@ def __init__(self,
         self.env = env_func(normalized_rl_action_space=False)
         self.training_env = env_func(randomized_init=True,
                                      init_state=None,
-                                     cost='quadratic',
                                      normalized_rl_action_space=False,
                                      )
 
         # Setup attributes.
         self.reset()
         self.dt = self.model.dt
-        self.Q = get_cost_weight_matrix(q_lin, self.model.nx)
-        self.R = get_cost_weight_matrix(r_lin, self.model.nu)
+        self.Q = get_cost_weight_matrix(q_mpc, self.model.nx)
+        self.R = get_cost_weight_matrix(r_mpc, self.model.nu)
 
         self.X_EQ = np.zeros(self.model.nx)
         self.U_EQ = self.model.U_EQ
@@ -79,6 +84,7 @@ def __init__(self,
         self.lqr_gain = -compute_lqr_gain(self.model, self.X_EQ, self.U_EQ, self.Q, self.R, discrete_dynamics=True)
 
         self.terminal_set = None
+        self.prev_action = self.U_EQ
 
         if self.additional_constraints is None:
             additional_constraints = []
@@ -87,6 +93,10 @@ def __init__(self,
 
         if cost_function == Cost_Function.ONE_STEP_COST:
             self.cost_function = ONE_STEP_COST()
+            self.mpsc_cost_horizon = 1
+            self.cost_function.mpsc_cost_horizon = 1
+        elif cost_function == Cost_Function.PRECOMPUTED_COST:
+            self.cost_function = PRECOMPUTED_COST(self.env, mpsc_cost_horizon, decay_factor, self.output_dir)
         else:
             raise NotImplementedError(f'The MPSC cost function {cost_function} has not been implemented')
 
@@ -95,9 +105,21 @@ def set_dynamics(self):
         '''Compute the dynamics.'''
         raise NotImplementedError
 
-    @abstractmethod
     def setup_optimizer(self):
         '''Setup the certifying MPC problem.'''
+        if self.use_acados:
+            self.setup_acados_optimizer()
+        else:
+            self.setup_casadi_optimizer()
+
+    @abstractmethod
+    def setup_casadi_optimizer(self):
+        '''Setup the certifying MPC problem using CasADi.'''
+        raise NotImplementedError
+
+    @abstractmethod
+    def setup_acados_optimizer(self):
+        '''Setup the certifying MPC problem using ACADOS.'''
         raise NotImplementedError
 
     def before_optimization(self, obs):
@@ -125,6 +147,28 @@ def solve_optimization(self,
             feasible (bool): Whether the safety filtering was feasible or not.
         '''
 
+        if self.use_acados:
+            action, feasible = self.solve_acados_optimization(obs, uncertified_action, iteration)
+        else:
+            action, feasible = self.solve_casadi_optimization(obs, uncertified_action, iteration)
+        return action, feasible
+
+    def solve_casadi_optimization(self,
+                                  obs,
+                                  uncertified_action,
+                                  iteration=None,
+                                  ):
+        '''Solve the MPC optimization problem for a given observation and uncertified input.
+
+        Args:
+            obs (ndarray): Current state/observation.
+            uncertified_action (ndarray): The uncertified_controller's action.
+            iteration (int): The current iteration, used for trajectory tracking.
+
+        Returns:
+            action (ndarray): The certified action.
+            feasible (bool): Whether the safety filtering was feasible or not.
+        '''
         opti_dict = self.opti_dict
         opti = opti_dict['opti']
         z_var = opti_dict['z_var']
@@ -153,6 +197,8 @@ def solve_optimization(self,
         # Solve the optimization problem.
         try:
             sol = opti.solve()
+            self.cost_prev = sol.value(opti_dict['cost'])
+            self.slack_prev = sol.value(opti_dict['slack'])
             x_val, u_val, next_u_val = sol.value(z_var), sol.value(v_var), sol.value(next_u)
             self.z_prev = x_val
             self.v_prev = u_val.reshape((self.model.nu), self.horizon)
@@ -168,6 +214,56 @@ def solve_optimization(self,
             action = None
         return action, feasible
 
+    def solve_acados_optimization(self,
+                                  obs,
+                                  uncertified_action,
+                                  iteration=None,
+                                  ):
+        '''Solve the MPC optimization problem for a given observation and uncertified input.
+
+        Args:
+            obs (ndarray): Current state/observation.
+            uncertified_action (ndarray): The uncertified_controller's action.
+            iteration (int): The current iteration, used for trajectory tracking.
+
+        Returns:
+            action (ndarray): The certified action.
+            feasible (bool): Whether the safety filtering was feasible or not.
+        '''
+
+        ocp_solver = self.ocp_solver
+        ocp_solver.cost_set(0, 'yref', np.concatenate((np.zeros((self.model.nx)), np.atleast_1d(np.squeeze(uncertified_action)))))
+
+        if isinstance(self.cost_function, PRECOMPUTED_COST):
+            uncert_input_traj = self.cost_function.calculate_unsafe_path(obs, uncertified_action, iteration)
+
+            for stage in range(1, self.mpsc_cost_horizon):
+                ocp_solver.cost_set(stage, 'yref', np.concatenate((np.zeros((self.model.nx)), uncert_input_traj[:, stage])))
+
+        # Solve the optimization problem.
+        try:
+            action = ocp_solver.solve_for_x0(x0_bar=obs)
+            self.cost_prev = ocp_solver.get_cost()
+            self.slack_prev = np.zeros((self.horizon, self.model.nx + self.model.nu))
+            x_val = np.zeros((self.horizon + 1, self.model.nx))
+            u_val = np.zeros((self.horizon, self.model.nu))
+            for i in range(self.horizon):
+                # self.slack_prev[i, :] = ocp_solver.get(i, 'su')
+                x_val[i, :] = ocp_solver.get(i, 'x')
+                u_val[i, :] = ocp_solver.get(i, 'u')
+            x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x')
+            self.z_prev = x_val.T
+            self.v_prev = u_val.T
+            # Take the first one from solved action sequence.
+            self.prev_action = action
+            feasible = True
+        except Exception as e:
+            print('Error Return Status:', ocp_solver.status)
+            print(e)
+            feasible = False
+            action = None
+        return action, feasible
+
     def certify_action(self,
                        current_state,
                        uncertified_action,
@@ -254,5 +350,6 @@ def reset_before_run(self, env=None):
         '''
         self.z_prev = None
         self.v_prev = None
+        self.slack_prev = 0
         self.kinf = self.horizon - 1
         self.setup_results_dict()
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.yaml b/safe_control_gym/safety_filters/mpsc/mpsc.yaml
index 9d2798d0c..9640712bd 100644
--- a/safe_control_gym/safety_filters/mpsc/mpsc.yaml
+++ b/safe_control_gym/safety_filters/mpsc/mpsc.yaml
@@ -1,7 +1,7 @@
 # LQR controller parameters
-r_lin:
+r_mpc:
   - 1.
-q_lin:
+q_mpc:
   - 1.
 
 # MPC Parameters
@@ -10,19 +10,7 @@ warmstart: False
 integration_algo: rk4
 use_terminal_set: False
 
-# Prior info
-prior_info:
-  prior_prop: null
-  randomize_prior_prop: False
-  prior_prop_rand_info: null
-
-# Safe set calculation
-n_samples: 600
-n_samples_terminal_set: 10
-learn_terminal_set: False
-
-# Tau parameter for the calcuation of the RPI
-tau: 0.95
-
 # Cost function
 cost_function: one_step_cost
+mpsc_cost_horizon: 5
+decay_factor: 0.85
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_acados.py b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py
new file mode 100644
index 000000000..1846f85ca
--- /dev/null
+++ b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py
@@ -0,0 +1,259 @@
+'''Model Predictive Safety Certification using Acados.'''
+import os
+import shutil
+from datetime import datetime
+
+import casadi as cs
+import numpy as np
+import scipy
+from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver
+
+from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound
+from safe_control_gym.safety_filters.mpsc.mpsc import MPSC
+from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function
+from safe_control_gym.utils.utils import timing
+
+
+class MPSC_ACADOS(MPSC):
+    '''MPSC with full nonlinear model.'''
+
+    def __init__(
+            self,
+            env_func,
+            horizon: int = 5,
+            q_mpc: list = [1],
+            r_mpc: list = [1],
+            integration_algo: str = 'rk4',
+            warmstart: bool = True,
+            additional_constraints: list = None,
+            use_terminal_set: bool = True,
+            soften_constraints: bool = False,
+            slack_cost: float = 1,
+            constraint_tol: float = 1e-6,
+            seed: int = 0,
+            use_RTI: bool = False,
+            cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
+            mpsc_cost_horizon: int = 5,
+            decay_factor: float = 0.85,
+            **kwargs
+    ):
+        '''Creates task and controller.
+
+        Args:
+            env_func (Callable): function to instantiate task/environment.
+            horizon (int): mpc planning horizon.
+            q_mpc (list): diagonals of state cost weight.
+            r_mpc (list): diagonals of input/action cost weight.
+            warmstart (bool): if to initialize from previous iteration.
+            soften_constraints (bool): Formulate the constraints as soft constraints.
+            constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact.
+            seed (int): random seed.
+            use_RTI (bool): Real-time iteration for acados.
+        '''
+        for k, v in locals().items():
+            if k != 'self' and k != 'kwargs' and '__' not in k:
+                self.__dict__.update({k: v})
+
+        super().__init__(
+            env_func,
+            horizon,
+            q_mpc,
+            r_mpc,
+            integration_algo,
+            warmstart,
+            additional_constraints,
+            use_terminal_set,
+            cost_function,
+            mpsc_cost_horizon,
+            decay_factor,
+            **kwargs)
+
+        # acados settings
+        self.use_RTI = use_RTI
+
+        # Dynamics model.
+        self.setup_acados_model()
+        # Acados optimizer.
+        self.setup_acados_optimizer()
+
+    @timing
+    def reset(self):
+        '''Prepares for training or evaluation.'''
+        print('Resetting MPC')
+        super().reset()
+        if hasattr(self, 'acados_model'):
+            del self.acados_model
+        if hasattr(self, 'ocp'):
+            del self.ocp
+        if hasattr(self, 'acados_ocp_solver'):
+            del self.acados_ocp_solver
+
+        # delete the generated c code directory
+        if os.path.exists('./c_generated_code'):
+            print('deleting the generated MPC c code directory')
+            shutil.rmtree('./c_generated_code')
+            assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory'
+
+    def setup_acados_model(self) -> AcadosModel:
+        '''Sets up symbolic model for acados.'''
+        acados_model = AcadosModel()
+        acados_model.x = self.model.x_sym
+        acados_model.u = self.model.u_sym
+        acados_model.name = self.env.NAME
+
+        # set up rk4 (acados need symbolic expression of dynamics, not function)
+        k1 = self.model.fc_func(acados_model.x, acados_model.u)
+        k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u)
+        k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u)
+        k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u)
+        f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
+
+        acados_model.disc_dyn_expr = f_disc
+
+        # store meta information # NOTE: unit is missing
+        acados_model.x_labels = self.env.STATE_LABELS
+        acados_model.u_labels = self.env.ACTION_LABELS
+        acados_model.t_label = 'time'
+        # get current time stamp in $ymd_HMS format
+        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
+        acados_model.name = self.env.NAME + '_' + current_time
+
+        self.acados_model = acados_model
+
+    def set_dynamics(self):
+        pass
+
+    def setup_casadi_optimizer(self):
+        pass
+
+    def setup_acados_optimizer(self):
+        '''Sets up nonlinear optimization problem.'''
+        nx, nu = self.model.nx, self.model.nu
+        ny = nx + nu
+        ny_e = nx
+
+        # create ocp object to formulate the OCP
+        ocp = AcadosOcp()
+        ocp.model = self.acados_model
+
+        # set dimensions
+        ocp.dims.N = self.horizon  # prediction horizon
+
+        # set cost (NOTE: safe-control-gym uses quadratic cost)
+        ocp.cost.cost_type = 'LINEAR_LS'
+        ocp.cost.cost_type_e = 'LINEAR_LS'
+
+        Q_mat = np.zeros((nx, nx))
+        R_mat = np.eye(nu)
+        ocp.cost.W_e = np.zeros((nx, nx))
+        ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat)
+
+        ocp.cost.Vx = np.zeros((ny, nx))
+        ocp.cost.Vu = np.zeros((ny, nu))
+        ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu)
+        ocp.cost.Vx_e = np.eye(nx)
+
+        # placeholder y_ref and y_ref_e (will be set in select_action)
+        ocp.cost.yref = np.zeros((ny, ))
+        ocp.cost.yref_e = np.zeros((ny_e, ))
+
+        # set up solver options
+        ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM'
+        ocp.solver_options.hessian_approx = 'GAUSS_NEWTON'
+        ocp.solver_options.integrator_type = 'DISCRETE'
+        ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI'
+        ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1
+        ocp.solver_options.tf = self.horizon * self.dt  # prediction horizon
+
+        ocp.constraints.x0 = self.model.X_EQ
+
+        # Constraints
+        # general constraint expressions
+        state_constraint_expr_list = []
+        input_constraint_expr_list = []
+        for state_constraint in self.state_constraints_sym:
+            state_constraint_expr_list.append(state_constraint(ocp.model.x))
+        for input_constraint in self.input_constraints_sym:
+            input_constraint_expr_list.append(input_constraint(ocp.model.u))
+
+        h_expr_list = state_constraint_expr_list + input_constraint_expr_list
+        h_expr = cs.vertcat(*h_expr_list)
+        h0_expr = cs.vertcat(*h_expr_list)
+        he_expr = cs.vertcat(*state_constraint_expr_list)  # terminal constraints are only state constraints
+        # pass the constraints to the ocp object
+        ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr)
+
+        # slack costs for nonlinear constraints
+        if self.soften_constraints:
+            # slack variables for all constraints
+            ocp.constraints.Jsh = np.eye(2 * ny)
+            # slack penalty
+            ocp.cost.Zu = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.Zl = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.zl = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.zu = self.slack_cost * np.ones(2 * ny)
+
+        solver_json = 'acados_ocp_mpsf.json'
+        ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True)
+
+        for stage in range(self.mpsc_cost_horizon):
+            ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)
+
+        for stage in range(self.mpsc_cost_horizon, self.horizon):
+            ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W)
+
+        self.ocp_solver = ocp_solver
+        self.ocp = ocp
+
+    def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp:
+        '''Preprocess the constraints to be compatible with acados.
+            Args:
+                ocp (AcadosOcp): acados ocp object
+                h0_expr (casadi expression): initial state constraints
+                h_expr (casadi expression): state and input constraints
+                he_expr (casadi expression): terminal state constraints
+            Returns:
+                ocp (AcadosOcp): acados ocp object with constraints set.
+
+        An alternative way to set the constraints is to use bounded constraints of acados:
+        # bounded input constraints
+        idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0]
+        ocp.constraints.Jbu = np.eye(nu)
+        ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds
+        ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds
+        ocp.constraints.idxbu = idxbu # active constraints dimension
+        '''
+
+        ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol),
+              'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol),
+              'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), }
+
+        lb = {'h': set_acados_constraint_bound(h_expr, 'lb'),
+              'h0': set_acados_constraint_bound(h0_expr, 'lb'),
+              'he': set_acados_constraint_bound(he_expr, 'lb'), }
+
+        # make sure all the ub and lb are 1D numpy arrays
+        # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche)
+        for key in ub.keys():
+            ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key]
+            lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key]
+        # check ub and lb dimensions
+        for key in ub.keys():
+            assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array'
+            assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array'
+        assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes'
+
+        # pass the constraints to the ocp object
+        ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \
+            h0_expr, h_expr, he_expr
+        ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \
+            h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0]
+        # assign constraints upper and lower bounds
+        ocp.constraints.uh_0 = ub['h0']
+        ocp.constraints.lh_0 = lb['h0']
+        ocp.constraints.uh = ub['h']
+        ocp.constraints.lh = lb['h']
+        ocp.constraints.uh_e = ub['he']
+        ocp.constraints.lh_e = lb['he']
+
+        return ocp
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py
index 3980790c2..cc9db996a 100644
--- a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py
+++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py
@@ -10,11 +10,15 @@ class MPSC_COST(ABC):
 
     def __init__(self,
                  env: BenchmarkEnv = None,
+                 mpsc_cost_horizon: int = 1,
+                 decay_factor: float = 0.85,
                  ):
         '''Initialize the MPSC Cost.
 
         Args:
             env (BenchmarkEnv): Environment for the task.
+            mpsc_cost_horizon (int): How many steps forward to check for constraint violations.
+            decay_factor (float): How much to discount future costs.
         '''
 
         self.env = env
@@ -22,6 +26,9 @@ def __init__(self,
         # Setup attributes.
         self.model = self.env.symbolic if env is not None else None
 
+        self.mpsc_cost_horizon = mpsc_cost_horizon
+        self.decay_factor = decay_factor
+
     @abstractmethod
     def get_cost(self, opti_dict):
         '''Returns the cost function for the MPSC optimization in symbolic form.
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py
new file mode 100644
index 000000000..b6a1acc4e
--- /dev/null
+++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py
@@ -0,0 +1,127 @@
+'''Precomputed Cost Function for Smooth MPSC.'''
+
+import numpy as np
+
+from safe_control_gym.controllers.pid.pid import PID
+from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnv
+from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.abstract_cost import MPSC_COST
+
+
+class PRECOMPUTED_COST(MPSC_COST):
+    '''Precomputed future states MPSC Cost Function.'''
+
+    def __init__(self,
+                 env,
+                 mpsc_cost_horizon: int = 5,
+                 decay_factor: float = 0.85,
+                 output_dir: str = '.',
+                 ):
+        '''Initialize the MPSC Cost.
+
+        Args:
+            env (BenchmarkEnv): Environment for the task.
+            mpsc_cost_horizon (int): How many steps forward to check for constraint violations.
+            decay_factor (float): How much to discount future costs.
+            output_dir (str): Folder to write outputs.
+        '''
+
+        super().__init__(env, mpsc_cost_horizon, decay_factor)
+
+        self.output_dir = output_dir
+        self.uncertified_controller = None
+
+    def get_cost(self, opti_dict):
+        '''Returns the cost function for the MPSC optimization in symbolic form.
+
+        Args:
+            opti_dict (dict): The dictionary of optimization variables.
+
+        Returns:
+            cost (casadi symbolic expression): The symbolic cost function using casadi.
+        '''
+
+        opti = opti_dict['opti']
+        next_u = opti_dict['next_u']
+        u_L = opti_dict['u_L']
+        v_var = opti_dict['v_var']
+
+        v_L = opti.parameter(self.model.nu, self.mpsc_cost_horizon)
+
+        opti_dict['v_L'] = v_L
+
+        cost = (u_L - next_u).T @ (u_L - next_u)
+        for h in range(1, self.mpsc_cost_horizon):
+            cost += (self.decay_factor**h) * (v_L[:, h] - v_var[:, h]).T @ (v_L[:, h] - v_var[:, h])
+
+        return cost
+
+    def prepare_cost_variables(self, opti_dict, obs, iteration):
+        '''Prepares all the symbolic variable initial values for the next optimization.
+
+        Args:
+            opti_dict (dict): The dictionary of optimization variables.
+            obs (ndarray): Current state/observation.
+            iteration (int): The current iteration, used for trajectory tracking.
+        '''
+
+        opti = opti_dict['opti']
+        v_L = opti_dict['v_L']
+        u_L = opti_dict['u_L']
+
+        uncertified_action = opti.value(u_L, opti.initial())
+
+        expected_inputs = self.calculate_unsafe_path(obs, uncertified_action, iteration)
+        opti.set_value(v_L, expected_inputs)
+
+    def calculate_unsafe_path(self, obs, uncertified_action, iteration):
+        '''Precomputes the likely actions the uncertified controller will take.
+
+        Args:
+            obs (ndarray): Current state/observation.
+            uncertified_action (ndarray): The uncertified_controller's action.
+            iteration (int): The current iteration, used for trajectory tracking.
+
+        Returns:
+            v_L (ndarray): The estimated future actions taken by the uncertified_controller.
+        '''
+
+        if self.uncertified_controller is None:
+            raise Exception('[ERROR] No underlying controller passed to P_MPSC')
+
+        if isinstance(self.uncertified_controller.env, VecEnv):
+            uncert_env = self.uncertified_controller.env.envs[0]
+        else:
+            uncert_env = self.uncertified_controller.env
+
+        v_L = np.zeros((self.model.nu, self.mpsc_cost_horizon))
+
+        if isinstance(self.uncertified_controller, PID):
+            self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_curr.npy')
+            self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_prev.npy')
+
+        for h in range(self.mpsc_cost_horizon):
+            next_step = min(iteration + h, self.env.X_GOAL.shape[0] - 1)
+            # Concatenate goal info (goal state(s)) for RL
+            extended_obs = self.env.extend_obs(obs, next_step + 1)
+
+            info = {'current_step': next_step}
+
+            action = self.uncertified_controller.select_action(obs=extended_obs, info=info)
+
+            if uncert_env.NORMALIZED_RL_ACTION_SPACE:
+                action = uncert_env.denormalize_action(action)
+
+            action = np.clip(action, self.env.physical_action_bounds[0], self.env.physical_action_bounds[1])
+
+            if h == 0 and np.linalg.norm(uncertified_action - action) >= 0.001:
+                raise ValueError(f'[ERROR] Mismatch between unsafe controller and MPSC guess. Uncert: {uncertified_action}, Guess: {action}, Diff: {np.linalg.norm(uncertified_action - action)}.')
+
+            v_L[:, h:h + 1] = action.reshape((self.model.nu, 1))
+
+            obs = np.squeeze(self.model.fd_func(x0=obs, p=action)['xf'].toarray())
+
+        if isinstance(self.uncertified_controller, PID):
+            self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_curr.npy')
+            self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_prev.npy')
+
+        return v_L
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py
index 98bfd40be..0771e0f75 100644
--- a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py
+++ b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py
@@ -16,6 +16,7 @@ class Cost_Function(str, Enum):
     '''MPSC Cost functions enumeration class.'''
 
     ONE_STEP_COST = 'one_step_cost'         # Default MPSC cost function.
+    PRECOMPUTED_COST = 'precomputed_cost'   # Smooth cost based on precomputed future states
 
 
 def compute_RPI_set(Acl,
@@ -58,8 +59,7 @@ def compute_RPI_set(Acl,
     except cp.SolverError:
         msg = '[ERROR] RPI Computation failed. Ensure you have the MOSEK solver. Otherwise, error unknown.'
         print(msg)
-        raise Exception(msg) from None 
-        # exit()
+        raise Exception(msg) from None
     return P.value
 
 
@@ -144,3 +144,16 @@ def get_trajectory_on_horizon(env, iteration, horizon):
         clipped_X_GOAL = env.X_GOAL
 
     return clipped_X_GOAL
+
+
+def get_discrete_derivative(signal, ctrl_freq):
+    '''Calculates the discrete derivative of a signal.
+
+    Args:
+        signal (np.ndarray): A array of values.
+
+    Returns:
+        discrete_derivative (float): The discrete_derivative of the signal.
+    '''
+    discrete_derivative = (signal[1:, :] - signal[:-1, :]) * ctrl_freq
+    return discrete_derivative