diff --git a/experiments/mpsc/config_overrides/quadrotor_3D/ppo_quadrotor_3D.yaml b/experiments/mpsc/config_overrides/quadrotor_3D/ppo_quadrotor_3D.yaml
index f74399e5b..294a1e0bf 100644
--- a/experiments/mpsc/config_overrides/quadrotor_3D/ppo_quadrotor_3D.yaml
+++ b/experiments/mpsc/config_overrides/quadrotor_3D/ppo_quadrotor_3D.yaml
@@ -32,3 +32,5 @@ algo_config:
   penalize_sf_diff: True
   sf_penalty: 75
   use_safe_reset: True
+  decay_factor_curriculum: False
+  preserve_random_state: False
diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh
index ca7fb021f..f699c7cbf 100755
--- a/experiments/mpsc/train_all_models.sh
+++ b/experiments/mpsc/train_all_models.sh
@@ -2,7 +2,12 @@
 sbatch train_model.sbatch False 1 1
 for MPSC_COST_HORIZON in 2 5 10 20; do
     for DECAY_FACTOR in 0.25 0.5 0.75 1; do
-        sbatch train_model.sbatch True  $MPSC_COST_HORIZON $DECAY_FACTOR
-        sbatch train_model.sbatch False $MPSC_COST_HORIZON $DECAY_FACTOR
+        # Ignore precomputed differences
+        sbatch train_model.sbatch False $MPSC_COST_HORIZON $DECAY_FACTOR False
+        sbatch train_model.sbatch True  $MPSC_COST_HORIZON $DECAY_FACTOR False
+
+        # Preserve random state
+        sbatch train_model.sbatch False $MPSC_COST_HORIZON $DECAY_FACTOR True
+        sbatch train_model.sbatch True  $MPSC_COST_HORIZON $DECAY_FACTOR True
     done
 done
diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch
index ca2bd81e4..64839ae6d 100755
--- a/experiments/mpsc/train_model.sbatch
+++ b/experiments/mpsc/train_model.sbatch
@@ -20,7 +20,7 @@ MPSC_COST_HORIZON=$2
 DECAY_FACTOR=$3
 SF_PENALTY=1.0
 
-TAG="curriculum_$1_$2_$3"
+TAG="curriculum_$1_$2_$3_$4"
 echo $TAG $SYS $ALGO $TASK
 
 # Train the unsafe controller/agent.
@@ -45,6 +45,7 @@ python3 train_rl.py \
         algo_config.use_safe_reset=True \
         algo_config.penalize_sf_diff=True \
         algo_config.sf_penalty=$SF_PENALTY \
-        algo_config.decay_factor_curriculum=$1
+        algo_config.decay_factor_curriculum=$1 \
+        algo_config.preserve_random_state=$4
 
 ./mpsc_experiment.sh $TAG $SYS $TASK $ALGO
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
index bf35c673c..7095f3429 100644
--- a/safe_control_gym/controllers/ppo/ppo.py
+++ b/safe_control_gym/controllers/ppo/ppo.py
@@ -26,7 +26,7 @@
 from safe_control_gym.math_and_models.normalization import (BaseNormalizer, MeanStdNormalizer,
                                                             RewardStdNormalizer)
 from safe_control_gym.utils.logging import ExperimentLogger
-from safe_control_gym.utils.utils import get_random_state, is_wrapped, set_random_state
+from safe_control_gym.utils.utils import is_wrapped
 
 
 class PPO(BaseController):
@@ -120,8 +120,15 @@ def close(self):
 
     def save(self,
              path,
+             save_only_random_seed=False,
              ):
         '''Saves model params and experiment state to checkpoint path.'''
+        if save_only_random_seed is True:
+            exp_state = {
+                'env_random_state': self.env.get_env_random_state()
+            }
+            torch.save(exp_state, path)
+            return
         path_dir = os.path.dirname(path)
         os.makedirs(path_dir, exist_ok=True)
         state_dict = {
@@ -133,7 +140,6 @@ def save(self,
             exp_state = {
                 'total_steps': self.total_steps,
                 'obs': self.obs,
-                'random_state': get_random_state(),
                 'env_random_state': self.env.get_env_random_state()
             }
             state_dict.update(exp_state)
@@ -141,9 +147,13 @@ def save(self,
 
     def load(self,
              path,
+             load_only_random_seed=False,
              ):
         '''Restores model and experiment given checkpoint path.'''
         state = torch.load(path)
+        if load_only_random_seed is True:
+            self.env.set_env_random_state(state['env_random_state'])
+            return
         # Restore policy.
         self.agent.load_state_dict(state['agent'])
         self.obs_normalizer.load_state_dict(state['obs_normalizer'])
@@ -152,7 +162,6 @@ def load(self,
         if self.training:
             self.total_steps = state['total_steps']
             self.obs = state['obs']
-            set_random_state(state['random_state'])
             self.env.set_env_random_state(state['env_random_state'])
             self.logger.load(self.total_steps)
 
@@ -192,7 +201,7 @@ def learn(self,
             if self.log_interval and self.total_steps % self.log_interval == 0:
                 self.log_step(results)
 
-    def select_action(self, obs, info=None):
+    def select_action(self, obs, info=None, training=False):
         '''Determine the action to take at the current timestep.
 
         Args:
@@ -203,9 +212,14 @@ def select_action(self, obs, info=None):
             action (ndarray): The action chosen by the controller.
         '''
 
-        with torch.no_grad():
-            obs = torch.FloatTensor(obs).to(self.device)
-            action = self.agent.ac.act(obs)
+        if not training:
+            with torch.no_grad():
+                obs = torch.FloatTensor(obs).to(self.device)
+                action = self.agent.ac.act(obs)
+        else:
+            with torch.no_grad():
+                obs = torch.FloatTensor(obs).to(self.device)
+                action, _, _ = self.agent.ac.step(obs)
 
         return action
 
@@ -216,6 +230,7 @@ def run(self,
             verbose=False,
             ):
         '''Runs evaluation with current policy.'''
+        self.curr_training = False
         self.agent.eval()
         self.obs_normalizer.set_read_only()
         if env is None:
@@ -283,6 +298,7 @@ def run(self,
 
     def train_step(self):
         '''Performs a training/fine-tuning step.'''
+        self.curr_training = True
         self.agent.train()
         self.obs_normalizer.unset_read_only()
         rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size)
@@ -290,6 +306,8 @@ def train_step(self):
         true_obs = self.true_obs
         info = self.info
         start = time.time()
+        if self.safety_filter is not None and self.preserve_random_state is True:
+            self.save('./temp-data/saved_controller_prev.npy', save_only_random_seed=True)
         for _ in range(self.rollout_steps):
             with torch.no_grad():
                 action, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device))
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py
index ba04e912f..6a7c2dce2 100644
--- a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py
+++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from safe_control_gym.controllers.pid.pid import PID
+from safe_control_gym.controllers.ppo.ppo import PPO
 from safe_control_gym.envs.benchmark_env import Environment
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnv
 from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.abstract_cost import MPSC_COST
@@ -99,15 +100,19 @@ def calculate_unsafe_path(self, obs, uncertified_action, iteration):
         if isinstance(self.uncertified_controller, PID):
             self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_curr.npy')
             self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_prev.npy')
+        elif isinstance(self.uncertified_controller, PPO) and self.uncertified_controller.curr_training is True and self.uncertified_controller.preserve_random_state:
+            self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_curr.npy', save_only_random_seed=True)
+            self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_prev.npy', load_only_random_seed=True)
 
         for h in range(self.mpsc_cost_horizon):
             next_step = min(iteration + h, self.env.X_GOAL.shape[0] - 1)
             # Concatenate goal info (goal state(s)) for RL
             extended_obs = self.env.extend_obs(obs, next_step + 1)
 
-            info = {'current_step': next_step}
-
-            action = self.uncertified_controller.select_action(obs=extended_obs, info=info)
+            if isinstance(self.uncertified_controller, PPO):
+                action = self.uncertified_controller.select_action(obs=extended_obs, info={'current_step': next_step}, training=self.uncertified_controller.curr_training)
+            else:
+                action = self.uncertified_controller.select_action(obs=extended_obs, info={'current_step': next_step})
 
             if uncert_env.NORMALIZED_RL_ACTION_SPACE:
                 if self.env.NAME == Environment.CARTPOLE:
@@ -117,8 +122,11 @@ def calculate_unsafe_path(self, obs, uncertified_action, iteration):
 
             action = np.clip(action, self.env.physical_action_bounds[0], self.env.physical_action_bounds[1])
 
-            # if h == 0 and np.linalg.norm(uncertified_action - action) >= 0.001:
-            #     raise ValueError(f'[ERROR] Mismatch between unsafe controller and MPSC guess. Uncert: {uncertified_action}, Guess: {action}, Diff: {np.linalg.norm(uncertified_action - action)}.')
+            if h == 0 \
+                    and np.linalg.norm(uncertified_action - action) >= 0.001 \
+                    and np.linalg.norm(uncertified_action - uncert_env.hover_thrust * np.ones(uncertified_action.shape)) >= 0.001\
+                    and self.uncertified_controller.preserve_random_state is True:
+                raise ValueError(f'[ERROR] Mismatch between unsafe controller and MPSC guess. Uncert: {uncertified_action}, Guess: {action}, Diff: {np.linalg.norm(uncertified_action - action)}.')
 
             v_L[:, h:h + 1] = action.reshape((self.model.nu, 1))
 
@@ -127,5 +135,8 @@ def calculate_unsafe_path(self, obs, uncertified_action, iteration):
         if isinstance(self.uncertified_controller, PID):
             self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_curr.npy')
             self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_prev.npy')
+        elif isinstance(self.uncertified_controller, PPO) and self.uncertified_controller.curr_training is True and self.uncertified_controller.preserve_random_state is True:
+            self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_curr.npy', load_only_random_seed=True)
+            self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_prev.npy', save_only_random_seed=True)
 
         return v_L