diff --git a/test/unit/with_pytorch/test_discrete_action_space.py b/test/unit/with_pytorch/test_discrete_action_space.py index 15a0e937..4f71a516 100644 --- a/test/unit/with_pytorch/test_discrete_action_space.py +++ b/test/unit/with_pytorch/test_discrete_action_space.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace @@ -19,4 +20,4 @@ def test_iter(self) -> None: actions = [torch.randn(4) for _ in range(5)] action_space = DiscreteActionSpace(actions=actions) for i, action in enumerate(action_space): - self.assertTrue(torch.equal(actions[i], action)) + tt.assert_close(actions[i], action, rtol=0.0, atol=0.0) diff --git a/test/unit/with_pytorch/test_disjoint_bandits.py b/test/unit/with_pytorch/test_disjoint_bandits.py index ed77cb8c..25c381c3 100644 --- a/test/unit/with_pytorch/test_disjoint_bandits.py +++ b/test/unit/with_pytorch/test_disjoint_bandits.py @@ -11,6 +11,7 @@ import unittest import torch +import torch.testing as tt from parameterized import parameterized_class from pearl.policy_learners.contextual_bandits.disjoint_bandit import ( DisjointBanditContainer, @@ -73,14 +74,13 @@ def test_learn_batch(self) -> None: for i, action in enumerate(self.batch.action): action = action.item() # check if linear regression works - self.assertTrue( - torch.allclose( - self.policy_learner._linear_regressions[action]( - self.batch.state[i : i + 1] - ), - self.batch.reward[i : i + 1], - atol=1e-1, - ) + tt.assert_close( + self.policy_learner._linear_regressions[action]( + self.batch.state[i : i + 1] + ), + self.batch.reward[i : i + 1], + atol=1e-1, + rtol=0.0, ) def test_ucb_act(self) -> None: @@ -245,14 +245,11 @@ def test_learn_batch(self) -> None: for i, action in enumerate(self.batch.action): action = action.item() # check if each arm model works - self.assertTrue( - torch.allclose( - policy_learner._arm_bandits[action].model( - self.batch.state[i : i + 1] - ), - self.batch.reward[i : i + 1], - atol=1e-1, - ) + tt.assert_close( + policy_learner._arm_bandits[action].model(self.batch.state[i : i + 1]), + self.batch.reward[i : i + 1], + atol=1e-1, + rtol=0.0, ) def test_ucb_act(self) -> None: @@ -404,7 +401,7 @@ def test_get_scores(self) -> None: sigmas = model.calculate_sigma(features) expected_scores.append(mus + alpha * sigmas) expected_scores = torch.cat(expected_scores, dim=1) - self.assertTrue(torch.allclose(scores, expected_scores, atol=1e-1)) + tt.assert_close(scores, expected_scores, atol=1e-1, rtol=0.0) def test_learn_batch_arm_subset(self) -> None: # test that learn_batch still works when the batch has a subset of arms diff --git a/test/unit/with_pytorch/test_dynamic_action_space.py b/test/unit/with_pytorch/test_dynamic_action_space.py index c2c02360..7a42cca7 100644 --- a/test/unit/with_pytorch/test_dynamic_action_space.py +++ b/test/unit/with_pytorch/test_dynamic_action_space.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.action_representation_modules.one_hot_action_representation_module import ( OneHotActionTensorRepresentationModule, ) @@ -67,35 +68,33 @@ def test_basic(self) -> None: current_available_actions = batch.curr_available_actions current_available_actions_mask = batch.curr_unavailable_actions_mask self.assertIsNotNone(current_available_actions) - self.assertTrue( - torch.equal( - current_available_actions, - torch.tensor([[[0.0], [2.0], [4.0], [0.0], [0.0]]]), - ) + tt.assert_close( + current_available_actions, + torch.tensor([[[0.0], [2.0], [4.0], [0.0], [0.0]]]), + rtol=0.0, + atol=0.0, ) self.assertIsNotNone(current_available_actions_mask) - self.assertTrue( - torch.equal( - current_available_actions_mask, - torch.tensor([[False, False, False, True, True]]), - ) + tt.assert_close( + current_available_actions_mask, + torch.tensor([[False, False, False, True, True]]), + rtol=0.0, + atol=0.0, ) next_available_actions = batch.next_available_actions next_unavailable_actions_mask = batch.next_unavailable_actions_mask self.assertIsNotNone(next_available_actions) - self.assertTrue( - torch.equal( - next_available_actions, - torch.tensor([[[0.0], [3.0], [0.0], [0.0], [0.0]]]), - ) + tt.assert_close( + next_available_actions, + torch.tensor([[[0.0], [3.0], [0.0], [0.0], [0.0]]]), + rtol=0.0, + atol=0.0, ) self.assertIsNotNone(next_unavailable_actions_mask) - self.assertTrue( - torch.equal( - next_unavailable_actions_mask, - torch.tensor([[False, False, True, True, True]]), - ) + tt.assert_close( + next_unavailable_actions_mask, + torch.tensor([[False, False, True, True, True]]), ) policy_learner = DeepQLearning( @@ -109,53 +108,53 @@ def test_basic(self) -> None: current_available_actions = batch.curr_available_actions current_unavailable_actions_mask = batch.curr_unavailable_actions_mask self.assertIsNotNone(current_available_actions) - self.assertTrue( - torch.equal( - current_available_actions, - torch.tensor( + tt.assert_close( + current_available_actions, + torch.tensor( + [ [ - [ - [1, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 0, 1], - [1, 0, 0, 0, 0], - [1, 0, 0, 0, 0], - ] + [1, 0, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 0, 0, 0, 0], ] - ), - ) + ] + ), + rtol=0.0, + atol=0.0, ) self.assertIsNotNone(current_unavailable_actions_mask) - self.assertTrue( - torch.equal( - current_unavailable_actions_mask, - torch.tensor([[False, False, False, True, True]]), - ) + tt.assert_close( + current_unavailable_actions_mask, + torch.tensor([[False, False, False, True, True]]), + rtol=0.0, + atol=0.0, ) next_available_actions = batch.next_available_actions next_unavailable_actions_mask = batch.next_unavailable_actions_mask self.assertIsNotNone(next_available_actions) - self.assertTrue( - torch.equal( - next_available_actions, - torch.tensor( + tt.assert_close( + next_available_actions, + torch.tensor( + [ [ - [ - [1, 0, 0, 0, 0], - [0, 0, 0, 1, 0], - [1, 0, 0, 0, 0], - [1, 0, 0, 0, 0], - [1, 0, 0, 0, 0], - ] + [1, 0, 0, 0, 0], + [0, 0, 0, 1, 0], + [1, 0, 0, 0, 0], + [1, 0, 0, 0, 0], + [1, 0, 0, 0, 0], ] - ), - ) + ] + ), + rtol=0.0, + atol=0.0, ) self.assertIsNotNone(next_unavailable_actions_mask) - self.assertTrue( - torch.equal( - next_unavailable_actions_mask, - torch.tensor([[False, False, True, True, True]]), - ) + tt.assert_close( + next_unavailable_actions_mask, + torch.tensor([[False, False, True, True, True]]), + rtol=0.0, + atol=0.0, ) diff --git a/test/unit/with_pytorch/test_ensembles.py b/test/unit/with_pytorch/test_ensembles.py index a1b2dcd6..c4e64f74 100644 --- a/test/unit/with_pytorch/test_ensembles.py +++ b/test/unit/with_pytorch/test_ensembles.py @@ -9,8 +9,9 @@ import unittest -import numpy.testing as npt import torch + +import torch.testing as tt from pearl.neural_networks.common.epistemic_neural_networks import Ensemble from pearl.neural_networks.common.utils import ensemble_forward from torch import optim @@ -49,10 +50,11 @@ def test_ensemble_values(self) -> None: for_loop_values = ensemble_forward(self.network.models, x, use_for_loop=True) vectorized_values = ensemble_forward(self.network.models, x, use_for_loop=False) self.assertEqual(for_loop_values.shape, vectorized_values.shape) - npt.assert_allclose( - for_loop_values.detach().numpy(), - vectorized_values.detach().numpy(), + tt.assert_close( + for_loop_values, + vectorized_values, atol=1e-5, + rtol=0.0, ) def test_ensemble_optimization(self) -> None: diff --git a/test/unit/with_pytorch/test_fifo_buffer.py b/test/unit/with_pytorch/test_fifo_buffer.py index de221a68..046a0f02 100644 --- a/test/unit/with_pytorch/test_fifo_buffer.py +++ b/test/unit/with_pytorch/test_fifo_buffer.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.replay_buffers.sequential_decision_making.fifo_on_policy_replay_buffer import ( FIFOOnPolicyReplayBuffer, @@ -65,32 +66,34 @@ def test_on_poliy_buffer_sarsa_match(self) -> None: ) # expect S0 A0 R0 S1 A1 returned from sample batch = replay_buffer.sample(1) - self.assertTrue( - torch.equal( - batch.state, - self.states[0].view(1, -1), - ) + tt.assert_close( + batch.state, + self.states[0].view(1, -1), + rtol=0.0, + atol=0.0, ) - self.assertTrue( - torch.equal( - batch.action, - torch.tensor([self.actions[0]]), - ) + tt.assert_close( + batch.action, + torch.tensor([self.actions[0]]), + rtol=0.0, + atol=0.0, + ) + tt.assert_close( + batch.reward, torch.tensor([self.rewards[0]]), rtol=0.0, atol=0.0 ) - self.assertTrue(torch.equal(batch.reward, torch.tensor([self.rewards[0]]))) assert (batch_next_state := batch.next_state) is not None - self.assertTrue( - torch.equal( - batch_next_state, - self.next_states[0].view(1, -1), - ) + tt.assert_close( + batch_next_state, + self.next_states[0].view(1, -1), + rtol=0.0, + atol=0.0, ) assert (batch_next_action := batch.next_action) is not None - self.assertTrue( - torch.equal( - batch_next_action, - torch.tensor([self.actions[1]]), - ) + tt.assert_close( + batch_next_action, + torch.tensor([self.actions[1]]), + rtol=0.0, + atol=0.0, ) def test_on_poliy_buffer_ternimal_push(self) -> None: diff --git a/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py b/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py index 502275e0..91ddec4b 100644 --- a/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py +++ b/test/unit/with_pytorch/test_hindsight_experience_replay_buffer.py @@ -11,6 +11,7 @@ from typing import Dict import torch +import torch.testing as tt from pearl.replay_buffers.sequential_decision_making.hindsight_experience_replay_buffer import ( HindsightExperienceReplayBuffer, @@ -89,6 +90,6 @@ def reward_fn(state: torch.Tensor, action: torch.Tensor) -> int: assert (batch_state := batch.state) is not None assert (batch_next_state := batch.next_state) is not None for i in range(2 * len(states) - 2): - self.assertTrue( - torch.all(torch.eq(batch_state[i][-2:], batch_next_state[i][-2:])) + tt.assert_close( + batch_state[i][-2:], batch_next_state[i][-2:], rtol=0.0, atol=0.0 ) diff --git a/test/unit/with_pytorch/test_linear_bandits.py b/test/unit/with_pytorch/test_linear_bandits.py index b02af327..e0655239 100644 --- a/test/unit/with_pytorch/test_linear_bandits.py +++ b/test/unit/with_pytorch/test_linear_bandits.py @@ -11,6 +11,7 @@ import unittest import torch +import torch.testing as tt from pearl.neural_networks.contextual_bandit.linear_regression import LinearRegression from pearl.policy_learners.contextual_bandits.linear_bandit import LinearBandit from pearl.policy_learners.exploration_modules.contextual_bandits.thompson_sampling_exploration import ( # noqa: E501 @@ -52,24 +53,20 @@ def setUp(self) -> None: def test_learn(self) -> None: batch = self.batch # a single input - self.assertTrue( - torch.allclose( - self.policy_learner.model( - torch.cat([batch.state[0], batch.action[0]]).unsqueeze(0), - ), - batch.reward[0:1], - atol=1e-4, - ) + tt.assert_close( + self.policy_learner.model( + torch.cat([batch.state[0], batch.action[0]]).unsqueeze(0), + ), + batch.reward[0:1], + atol=1e-3, + rtol=0.0, ) # a batch input - self.assertTrue( - torch.allclose( - self.policy_learner.model( - torch.cat([batch.state, batch.action], dim=1) - ), - batch.reward, - atol=1e-4, - ) + tt.assert_close( + self.policy_learner.model(torch.cat([batch.state, batch.action], dim=1)), + batch.reward, + atol=1e-3, + rtol=0.0, ) def test_linear_ucb_scores(self) -> None: @@ -166,14 +163,10 @@ def test_linear_ucb_sigma(self) -> None: ) # the 2nd arm's sigma is sqrt(10) times 1st arm's sigma - sigma_ratio = (sigma[-1] / sigma[0]).clone().detach() - self.assertTrue( - torch.allclose( - sigma_ratio, - torch.tensor(10.0**0.5), # the 1st arm occured 10 times than 2nd arm - rtol=0.01, - ) - ) + sigma_ratio = (sigma[-1] / sigma[0]).detach().item() + self.assertAlmostEqual( + sigma_ratio, 10.0**0.5, delta=0.01 + ) # the 1st arm occured 10 times than 2nd arm def test_linear_thompson_sampling_act(self) -> None: """ diff --git a/test/unit/with_pytorch/test_linear_regression.py b/test/unit/with_pytorch/test_linear_regression.py index 34c3f3e1..4ae7a640 100644 --- a/test/unit/with_pytorch/test_linear_regression.py +++ b/test/unit/with_pytorch/test_linear_regression.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.neural_networks.contextual_bandit.linear_regression import LinearRegression @@ -20,19 +21,19 @@ def test_append_ones(self) -> None: x = torch.randn(10) expected_output = torch.cat([torch.ones((1,)), x], dim=0) output = LinearRegression.append_ones(x) - self.assertTrue(torch.allclose(expected_output, output)) + tt.assert_close(expected_output, output) # 2D input x = torch.randn(10, 5) expected_output = torch.cat([torch.ones((10, 1)), x], dim=1) output = LinearRegression.append_ones(x) - self.assertTrue(torch.allclose(expected_output, output)) + tt.assert_close(expected_output, output) # 3D input x = torch.randn(10, 5, 6) expected_output = torch.cat([torch.ones((10, 5, 1)), x], dim=2) output = LinearRegression.append_ones(x) - self.assertTrue(torch.allclose(expected_output, output)) + tt.assert_close(expected_output, output) # make sure it's traceable _ = torch.fx.symbolic_trace(LinearRegression.append_ones) diff --git a/test/unit/with_pytorch/test_neural_linear_bandits.py b/test/unit/with_pytorch/test_neural_linear_bandits.py index d7e87516..412aeded 100644 --- a/test/unit/with_pytorch/test_neural_linear_bandits.py +++ b/test/unit/with_pytorch/test_neural_linear_bandits.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.neural_networks.common.residual_wrapper import ResidualWrapper from pearl.policy_learners.contextual_bandits.neural_bandit import LOSS_TYPES from pearl.policy_learners.contextual_bandits.neural_linear_bandit import ( @@ -71,25 +72,25 @@ def test_state_dict(self): copy_policy_learner.load_state_dict(policy_learner.state_dict()) # assert and check if they are the same - self.assertTrue( - torch.equal( - copy_policy_learner.model._linear_regression_layer._A, - policy_learner.model._linear_regression_layer._A, - ) + tt.assert_close( + copy_policy_learner.model._linear_regression_layer._A, + policy_learner.model._linear_regression_layer._A, + rtol=0.0, + atol=0.0, ) - self.assertTrue( - torch.equal( - copy_policy_learner.model._linear_regression_layer._b, - policy_learner.model._linear_regression_layer._b, - ) + tt.assert_close( + copy_policy_learner.model._linear_regression_layer._b, + policy_learner.model._linear_regression_layer._b, + rtol=0.0, + atol=0.0, ) for p1, p2 in zip( copy_policy_learner.model._nn_layers.parameters(), policy_learner.model._nn_layers.parameters(), ): - self.assertTrue(torch.equal(p1.to(p2.device), p2)) + tt.assert_close(p1.to(p2.device), p2, rtol=0.0, atol=0.0) # currently test support mse, mae, cross_entropy # separate loss_types into inddividual test cases to make it easier to debug. diff --git a/test/unit/with_pytorch/test_on_policy_replay_buffer.py b/test/unit/with_pytorch/test_on_policy_replay_buffer.py index 22f4e369..cd4e44fe 100644 --- a/test/unit/with_pytorch/test_on_policy_replay_buffer.py +++ b/test/unit/with_pytorch/test_on_policy_replay_buffer.py @@ -10,6 +10,7 @@ import unittest import torch +import torch.testing as tt from pearl.replay_buffers.sequential_decision_making.on_policy_replay_buffer import ( OnPolicyReplayBuffer, @@ -55,15 +56,16 @@ def test_push_complete_trajectory(self) -> None: # validate terminal state indicators - 1 only for the last element terminated = batch.terminated[order] - self.assertTrue( - torch.equal( - terminated, torch.eye(self.trajectory_len)[self.trajectory_len - 1] - ) + tt.assert_close( + terminated, + torch.eye(self.trajectory_len)[self.trajectory_len - 1].bool(), + rtol=0.0, + atol=0.0, ) # validate actions actions = batch.action[order] - self.assertTrue(torch.equal(actions, torch.arange(self.action_size))) + tt.assert_close(actions, torch.arange(self.action_size), rtol=0.0, atol=0.0) def test_push_2_trajectories(self) -> None: replay_buffer = OnPolicyReplayBuffer(self.capacity) @@ -108,29 +110,30 @@ def test_push_2_trajectories(self) -> None: # validate terminal state indicators - 1 only for the last element terminated = batch.terminated[order] - self.assertTrue( - torch.equal( - terminated[0 : self.trajectory_len], - torch.eye(self.trajectory_len)[self.trajectory_len - 1], - ) + tt.assert_close( + terminated[0 : self.trajectory_len], + torch.eye(self.trajectory_len)[self.trajectory_len - 1].bool(), + rtol=0.0, + atol=0.0, ) - self.assertTrue( - torch.equal( - terminated[self.trajectory_len :], - torch.eye(trajectory_len_2)[trajectory_len_2 - 1], - ) + tt.assert_close( + terminated[self.trajectory_len :], + torch.eye(trajectory_len_2)[trajectory_len_2 - 1].bool(), + rtol=0.0, + atol=0.0, ) # validate actions actions = batch.action[order] - self.assertTrue( - torch.equal( - actions[0 : self.trajectory_len], torch.arange(self.action_size) - ) + tt.assert_close( + actions[0 : self.trajectory_len], + torch.arange(self.action_size), + rtol=0.0, + atol=0.0, ) - self.assertTrue( - torch.equal( - actions[self.trajectory_len :], - torch.arange(self.action_size)[0:trajectory_len_2], - ) + tt.assert_close( + actions[self.trajectory_len :], + torch.arange(self.action_size)[0:trajectory_len_2], + rtol=0.0, + atol=0.0, )