diff --git a/test/unit/with_pytorch/test_discrete_action_space.py b/test/unit/with_pytorch/test_discrete_action_space.py
index 15a0e937..4f71a516 100644
--- a/test/unit/with_pytorch/test_discrete_action_space.py
+++ b/test/unit/with_pytorch/test_discrete_action_space.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace
 
 
@@ -19,4 +20,4 @@ def test_iter(self) -> None:
         actions = [torch.randn(4) for _ in range(5)]
         action_space = DiscreteActionSpace(actions=actions)
         for i, action in enumerate(action_space):
-            self.assertTrue(torch.equal(actions[i], action))
+            tt.assert_close(actions[i], action, rtol=0.0, atol=0.0)
diff --git a/test/unit/with_pytorch/test_disjoint_bandits.py b/test/unit/with_pytorch/test_disjoint_bandits.py
index ed77cb8c..25c381c3 100644
--- a/test/unit/with_pytorch/test_disjoint_bandits.py
+++ b/test/unit/with_pytorch/test_disjoint_bandits.py
@@ -11,6 +11,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 from parameterized import parameterized_class
 from pearl.policy_learners.contextual_bandits.disjoint_bandit import (
     DisjointBanditContainer,
@@ -73,14 +74,13 @@ def test_learn_batch(self) -> None:
         for i, action in enumerate(self.batch.action):
             action = action.item()
             # check if linear regression works
-            self.assertTrue(
-                torch.allclose(
-                    self.policy_learner._linear_regressions[action](
-                        self.batch.state[i : i + 1]
-                    ),
-                    self.batch.reward[i : i + 1],
-                    atol=1e-1,
-                )
+            tt.assert_close(
+                self.policy_learner._linear_regressions[action](
+                    self.batch.state[i : i + 1]
+                ),
+                self.batch.reward[i : i + 1],
+                atol=1e-1,
+                rtol=0.0,
             )
 
     def test_ucb_act(self) -> None:
@@ -245,14 +245,11 @@ def test_learn_batch(self) -> None:
         for i, action in enumerate(self.batch.action):
             action = action.item()
             # check if each arm model works
-            self.assertTrue(
-                torch.allclose(
-                    policy_learner._arm_bandits[action].model(
-                        self.batch.state[i : i + 1]
-                    ),
-                    self.batch.reward[i : i + 1],
-                    atol=1e-1,
-                )
+            tt.assert_close(
+                policy_learner._arm_bandits[action].model(self.batch.state[i : i + 1]),
+                self.batch.reward[i : i + 1],
+                atol=1e-1,
+                rtol=0.0,
             )
 
     def test_ucb_act(self) -> None:
@@ -404,7 +401,7 @@ def test_get_scores(self) -> None:
             sigmas = model.calculate_sigma(features)
             expected_scores.append(mus + alpha * sigmas)
         expected_scores = torch.cat(expected_scores, dim=1)
-        self.assertTrue(torch.allclose(scores, expected_scores, atol=1e-1))
+        tt.assert_close(scores, expected_scores, atol=1e-1, rtol=0.0)
 
     def test_learn_batch_arm_subset(self) -> None:
         # test that learn_batch still works when the batch has a subset of arms
diff --git a/test/unit/with_pytorch/test_dynamic_action_space.py b/test/unit/with_pytorch/test_dynamic_action_space.py
index c2c02360..7a42cca7 100644
--- a/test/unit/with_pytorch/test_dynamic_action_space.py
+++ b/test/unit/with_pytorch/test_dynamic_action_space.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 from pearl.action_representation_modules.one_hot_action_representation_module import (
     OneHotActionTensorRepresentationModule,
 )
@@ -67,35 +68,33 @@ def test_basic(self) -> None:
         current_available_actions = batch.curr_available_actions
         current_available_actions_mask = batch.curr_unavailable_actions_mask
         self.assertIsNotNone(current_available_actions)
-        self.assertTrue(
-            torch.equal(
-                current_available_actions,
-                torch.tensor([[[0.0], [2.0], [4.0], [0.0], [0.0]]]),
-            )
+        tt.assert_close(
+            current_available_actions,
+            torch.tensor([[[0.0], [2.0], [4.0], [0.0], [0.0]]]),
+            rtol=0.0,
+            atol=0.0,
         )
         self.assertIsNotNone(current_available_actions_mask)
-        self.assertTrue(
-            torch.equal(
-                current_available_actions_mask,
-                torch.tensor([[False, False, False, True, True]]),
-            )
+        tt.assert_close(
+            current_available_actions_mask,
+            torch.tensor([[False, False, False, True, True]]),
+            rtol=0.0,
+            atol=0.0,
         )
 
         next_available_actions = batch.next_available_actions
         next_unavailable_actions_mask = batch.next_unavailable_actions_mask
         self.assertIsNotNone(next_available_actions)
-        self.assertTrue(
-            torch.equal(
-                next_available_actions,
-                torch.tensor([[[0.0], [3.0], [0.0], [0.0], [0.0]]]),
-            )
+        tt.assert_close(
+            next_available_actions,
+            torch.tensor([[[0.0], [3.0], [0.0], [0.0], [0.0]]]),
+            rtol=0.0,
+            atol=0.0,
         )
         self.assertIsNotNone(next_unavailable_actions_mask)
-        self.assertTrue(
-            torch.equal(
-                next_unavailable_actions_mask,
-                torch.tensor([[False, False, True, True, True]]),
-            )
+        tt.assert_close(
+            next_unavailable_actions_mask,
+            torch.tensor([[False, False, True, True, True]]),
         )
 
         policy_learner = DeepQLearning(
@@ -109,53 +108,53 @@ def test_basic(self) -> None:
         current_available_actions = batch.curr_available_actions
         current_unavailable_actions_mask = batch.curr_unavailable_actions_mask
         self.assertIsNotNone(current_available_actions)
-        self.assertTrue(
-            torch.equal(
-                current_available_actions,
-                torch.tensor(
+        tt.assert_close(
+            current_available_actions,
+            torch.tensor(
+                [
                     [
-                        [
-                            [1, 0, 0, 0, 0],
-                            [0, 0, 1, 0, 0],
-                            [0, 0, 0, 0, 1],
-                            [1, 0, 0, 0, 0],
-                            [1, 0, 0, 0, 0],
-                        ]
+                        [1, 0, 0, 0, 0],
+                        [0, 0, 1, 0, 0],
+                        [0, 0, 0, 0, 1],
+                        [1, 0, 0, 0, 0],
+                        [1, 0, 0, 0, 0],
                     ]
-                ),
-            )
+                ]
+            ),
+            rtol=0.0,
+            atol=0.0,
         )
         self.assertIsNotNone(current_unavailable_actions_mask)
-        self.assertTrue(
-            torch.equal(
-                current_unavailable_actions_mask,
-                torch.tensor([[False, False, False, True, True]]),
-            )
+        tt.assert_close(
+            current_unavailable_actions_mask,
+            torch.tensor([[False, False, False, True, True]]),
+            rtol=0.0,
+            atol=0.0,
         )
 
         next_available_actions = batch.next_available_actions
         next_unavailable_actions_mask = batch.next_unavailable_actions_mask
         self.assertIsNotNone(next_available_actions)
-        self.assertTrue(
-            torch.equal(
-                next_available_actions,
-                torch.tensor(
+        tt.assert_close(
+            next_available_actions,
+            torch.tensor(
+                [
                     [
-                        [
-                            [1, 0, 0, 0, 0],
-                            [0, 0, 0, 1, 0],
-                            [1, 0, 0, 0, 0],
-                            [1, 0, 0, 0, 0],
-                            [1, 0, 0, 0, 0],
-                        ]
+                        [1, 0, 0, 0, 0],
+                        [0, 0, 0, 1, 0],
+                        [1, 0, 0, 0, 0],
+                        [1, 0, 0, 0, 0],
+                        [1, 0, 0, 0, 0],
                     ]
-                ),
-            )
+                ]
+            ),
+            rtol=0.0,
+            atol=0.0,
         )
         self.assertIsNotNone(next_unavailable_actions_mask)
-        self.assertTrue(
-            torch.equal(
-                next_unavailable_actions_mask,
-                torch.tensor([[False, False, True, True, True]]),
-            )
+        tt.assert_close(
+            next_unavailable_actions_mask,
+            torch.tensor([[False, False, True, True, True]]),
+            rtol=0.0,
+            atol=0.0,
         )
diff --git a/test/unit/with_pytorch/test_ensembles.py b/test/unit/with_pytorch/test_ensembles.py
index a1b2dcd6..c4e64f74 100644
--- a/test/unit/with_pytorch/test_ensembles.py
+++ b/test/unit/with_pytorch/test_ensembles.py
@@ -9,8 +9,9 @@
 
 import unittest
 
-import numpy.testing as npt
 import torch
+
+import torch.testing as tt
 from pearl.neural_networks.common.epistemic_neural_networks import Ensemble
 from pearl.neural_networks.common.utils import ensemble_forward
 from torch import optim
@@ -49,10 +50,11 @@ def test_ensemble_values(self) -> None:
         for_loop_values = ensemble_forward(self.network.models, x, use_for_loop=True)
         vectorized_values = ensemble_forward(self.network.models, x, use_for_loop=False)
         self.assertEqual(for_loop_values.shape, vectorized_values.shape)
-        npt.assert_allclose(
-            for_loop_values.detach().numpy(),
-            vectorized_values.detach().numpy(),
+        tt.assert_close(
+            for_loop_values,
+            vectorized_values,
             atol=1e-5,
+            rtol=0.0,
         )
 
     def test_ensemble_optimization(self) -> None:
diff --git a/test/unit/with_pytorch/test_fifo_buffer.py b/test/unit/with_pytorch/test_fifo_buffer.py
index de221a68..046a0f02 100644
--- a/test/unit/with_pytorch/test_fifo_buffer.py
+++ b/test/unit/with_pytorch/test_fifo_buffer.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 
 from pearl.replay_buffers.sequential_decision_making.fifo_on_policy_replay_buffer import (
     FIFOOnPolicyReplayBuffer,
@@ -65,32 +66,34 @@ def test_on_poliy_buffer_sarsa_match(self) -> None:
         )
         # expect S0 A0 R0 S1 A1 returned from sample
         batch = replay_buffer.sample(1)
-        self.assertTrue(
-            torch.equal(
-                batch.state,
-                self.states[0].view(1, -1),
-            )
+        tt.assert_close(
+            batch.state,
+            self.states[0].view(1, -1),
+            rtol=0.0,
+            atol=0.0,
         )
-        self.assertTrue(
-            torch.equal(
-                batch.action,
-                torch.tensor([self.actions[0]]),
-            )
+        tt.assert_close(
+            batch.action,
+            torch.tensor([self.actions[0]]),
+            rtol=0.0,
+            atol=0.0,
+        )
+        tt.assert_close(
+            batch.reward, torch.tensor([self.rewards[0]]), rtol=0.0, atol=0.0
         )
-        self.assertTrue(torch.equal(batch.reward, torch.tensor([self.rewards[0]])))
         assert (batch_next_state := batch.next_state) is not None
-        self.assertTrue(
-            torch.equal(
-                batch_next_state,
-                self.next_states[0].view(1, -1),
-            )
+        tt.assert_close(
+            batch_next_state,
+            self.next_states[0].view(1, -1),
+            rtol=0.0,
+            atol=0.0,
         )
         assert (batch_next_action := batch.next_action) is not None
-        self.assertTrue(
-            torch.equal(
-                batch_next_action,
-                torch.tensor([self.actions[1]]),
-            )
+        tt.assert_close(
+            batch_next_action,
+            torch.tensor([self.actions[1]]),
+            rtol=0.0,
+            atol=0.0,
         )
 
     def test_on_poliy_buffer_ternimal_push(self) -> None:
diff --git a/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py b/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py
index 502275e0..91ddec4b 100644
--- a/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py
+++ b/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py
@@ -11,6 +11,7 @@
 from typing import Dict
 
 import torch
+import torch.testing as tt
 
 from pearl.replay_buffers.sequential_decision_making.hindsight_experience_replay_buffer import (
     HindsightExperienceReplayBuffer,
@@ -89,6 +90,6 @@ def reward_fn(state: torch.Tensor, action: torch.Tensor) -> int:
         assert (batch_state := batch.state) is not None
         assert (batch_next_state := batch.next_state) is not None
         for i in range(2 * len(states) - 2):
-            self.assertTrue(
-                torch.all(torch.eq(batch_state[i][-2:], batch_next_state[i][-2:]))
+            tt.assert_close(
+                batch_state[i][-2:], batch_next_state[i][-2:], rtol=0.0, atol=0.0
             )
diff --git a/test/unit/with_pytorch/test_linear_bandits.py b/test/unit/with_pytorch/test_linear_bandits.py
index b02af327..e0655239 100644
--- a/test/unit/with_pytorch/test_linear_bandits.py
+++ b/test/unit/with_pytorch/test_linear_bandits.py
@@ -11,6 +11,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 from pearl.neural_networks.contextual_bandit.linear_regression import LinearRegression
 from pearl.policy_learners.contextual_bandits.linear_bandit import LinearBandit
 from pearl.policy_learners.exploration_modules.contextual_bandits.thompson_sampling_exploration import (  # noqa: E501
@@ -52,24 +53,20 @@ def setUp(self) -> None:
     def test_learn(self) -> None:
         batch = self.batch
         # a single input
-        self.assertTrue(
-            torch.allclose(
-                self.policy_learner.model(
-                    torch.cat([batch.state[0], batch.action[0]]).unsqueeze(0),
-                ),
-                batch.reward[0:1],
-                atol=1e-4,
-            )
+        tt.assert_close(
+            self.policy_learner.model(
+                torch.cat([batch.state[0], batch.action[0]]).unsqueeze(0),
+            ),
+            batch.reward[0:1],
+            atol=1e-3,
+            rtol=0.0,
         )
         # a batch input
-        self.assertTrue(
-            torch.allclose(
-                self.policy_learner.model(
-                    torch.cat([batch.state, batch.action], dim=1)
-                ),
-                batch.reward,
-                atol=1e-4,
-            )
+        tt.assert_close(
+            self.policy_learner.model(torch.cat([batch.state, batch.action], dim=1)),
+            batch.reward,
+            atol=1e-3,
+            rtol=0.0,
         )
 
     def test_linear_ucb_scores(self) -> None:
@@ -166,14 +163,10 @@ def test_linear_ucb_sigma(self) -> None:
         )
 
         # the 2nd arm's sigma is sqrt(10) times 1st arm's sigma
-        sigma_ratio = (sigma[-1] / sigma[0]).clone().detach()
-        self.assertTrue(
-            torch.allclose(
-                sigma_ratio,
-                torch.tensor(10.0**0.5),  # the 1st arm occured 10 times than 2nd arm
-                rtol=0.01,
-            )
-        )
+        sigma_ratio = (sigma[-1] / sigma[0]).detach().item()
+        self.assertAlmostEqual(
+            sigma_ratio, 10.0**0.5, delta=0.01
+        )  # the 1st arm occured 10 times than 2nd arm
 
     def test_linear_thompson_sampling_act(self) -> None:
         """
diff --git a/test/unit/with_pytorch/test_linear_regression.py b/test/unit/with_pytorch/test_linear_regression.py
index 34c3f3e1..4ae7a640 100644
--- a/test/unit/with_pytorch/test_linear_regression.py
+++ b/test/unit/with_pytorch/test_linear_regression.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 
 from pearl.neural_networks.contextual_bandit.linear_regression import LinearRegression
 
@@ -20,19 +21,19 @@ def test_append_ones(self) -> None:
         x = torch.randn(10)
         expected_output = torch.cat([torch.ones((1,)), x], dim=0)
         output = LinearRegression.append_ones(x)
-        self.assertTrue(torch.allclose(expected_output, output))
+        tt.assert_close(expected_output, output)
 
         # 2D input
         x = torch.randn(10, 5)
         expected_output = torch.cat([torch.ones((10, 1)), x], dim=1)
         output = LinearRegression.append_ones(x)
-        self.assertTrue(torch.allclose(expected_output, output))
+        tt.assert_close(expected_output, output)
 
         # 3D input
         x = torch.randn(10, 5, 6)
         expected_output = torch.cat([torch.ones((10, 5, 1)), x], dim=2)
         output = LinearRegression.append_ones(x)
-        self.assertTrue(torch.allclose(expected_output, output))
+        tt.assert_close(expected_output, output)
 
         # make sure it's traceable
         _ = torch.fx.symbolic_trace(LinearRegression.append_ones)
diff --git a/test/unit/with_pytorch/test_neural_linear_bandits.py b/test/unit/with_pytorch/test_neural_linear_bandits.py
index d7e87516..412aeded 100644
--- a/test/unit/with_pytorch/test_neural_linear_bandits.py
+++ b/test/unit/with_pytorch/test_neural_linear_bandits.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 from pearl.neural_networks.common.residual_wrapper import ResidualWrapper
 from pearl.policy_learners.contextual_bandits.neural_bandit import LOSS_TYPES
 from pearl.policy_learners.contextual_bandits.neural_linear_bandit import (
@@ -71,25 +72,25 @@ def test_state_dict(self):
         copy_policy_learner.load_state_dict(policy_learner.state_dict())
 
         # assert and check if they are the same
-        self.assertTrue(
-            torch.equal(
-                copy_policy_learner.model._linear_regression_layer._A,
-                policy_learner.model._linear_regression_layer._A,
-            )
+        tt.assert_close(
+            copy_policy_learner.model._linear_regression_layer._A,
+            policy_learner.model._linear_regression_layer._A,
+            rtol=0.0,
+            atol=0.0,
         )
 
-        self.assertTrue(
-            torch.equal(
-                copy_policy_learner.model._linear_regression_layer._b,
-                policy_learner.model._linear_regression_layer._b,
-            )
+        tt.assert_close(
+            copy_policy_learner.model._linear_regression_layer._b,
+            policy_learner.model._linear_regression_layer._b,
+            rtol=0.0,
+            atol=0.0,
         )
 
         for p1, p2 in zip(
             copy_policy_learner.model._nn_layers.parameters(),
             policy_learner.model._nn_layers.parameters(),
         ):
-            self.assertTrue(torch.equal(p1.to(p2.device), p2))
+            tt.assert_close(p1.to(p2.device), p2, rtol=0.0, atol=0.0)
 
     # currently test support mse, mae, cross_entropy
     # separate loss_types into inddividual test cases to make it easier to debug.
diff --git a/test/unit/with_pytorch/test_on_policy_replay_buffer.py b/test/unit/with_pytorch/test_on_policy_replay_buffer.py
index 22f4e369..cd4e44fe 100644
--- a/test/unit/with_pytorch/test_on_policy_replay_buffer.py
+++ b/test/unit/with_pytorch/test_on_policy_replay_buffer.py
@@ -10,6 +10,7 @@
 import unittest
 
 import torch
+import torch.testing as tt
 
 from pearl.replay_buffers.sequential_decision_making.on_policy_replay_buffer import (
     OnPolicyReplayBuffer,
@@ -55,15 +56,16 @@ def test_push_complete_trajectory(self) -> None:
 
         # validate terminal state indicators - 1 only for the last element
         terminated = batch.terminated[order]
-        self.assertTrue(
-            torch.equal(
-                terminated, torch.eye(self.trajectory_len)[self.trajectory_len - 1]
-            )
+        tt.assert_close(
+            terminated,
+            torch.eye(self.trajectory_len)[self.trajectory_len - 1].bool(),
+            rtol=0.0,
+            atol=0.0,
         )
 
         # validate actions
         actions = batch.action[order]
-        self.assertTrue(torch.equal(actions, torch.arange(self.action_size)))
+        tt.assert_close(actions, torch.arange(self.action_size), rtol=0.0, atol=0.0)
 
     def test_push_2_trajectories(self) -> None:
         replay_buffer = OnPolicyReplayBuffer(self.capacity)
@@ -108,29 +110,30 @@ def test_push_2_trajectories(self) -> None:
 
         # validate terminal state indicators - 1 only for the last element
         terminated = batch.terminated[order]
-        self.assertTrue(
-            torch.equal(
-                terminated[0 : self.trajectory_len],
-                torch.eye(self.trajectory_len)[self.trajectory_len - 1],
-            )
+        tt.assert_close(
+            terminated[0 : self.trajectory_len],
+            torch.eye(self.trajectory_len)[self.trajectory_len - 1].bool(),
+            rtol=0.0,
+            atol=0.0,
         )
-        self.assertTrue(
-            torch.equal(
-                terminated[self.trajectory_len :],
-                torch.eye(trajectory_len_2)[trajectory_len_2 - 1],
-            )
+        tt.assert_close(
+            terminated[self.trajectory_len :],
+            torch.eye(trajectory_len_2)[trajectory_len_2 - 1].bool(),
+            rtol=0.0,
+            atol=0.0,
         )
 
         # validate actions
         actions = batch.action[order]
-        self.assertTrue(
-            torch.equal(
-                actions[0 : self.trajectory_len], torch.arange(self.action_size)
-            )
+        tt.assert_close(
+            actions[0 : self.trajectory_len],
+            torch.arange(self.action_size),
+            rtol=0.0,
+            atol=0.0,
         )
-        self.assertTrue(
-            torch.equal(
-                actions[self.trajectory_len :],
-                torch.arange(self.action_size)[0:trajectory_len_2],
-            )
+        tt.assert_close(
+            actions[self.trajectory_len :],
+            torch.arange(self.action_size)[0:trajectory_len_2],
+            rtol=0.0,
+            atol=0.0,
         )