Skip to content

Commit

Permalink
Added tests for this newly added function
Browse files Browse the repository at this point in the history
  • Loading branch information
ThanosTsiamis committed May 10, 2024
1 parent 8aea3f2 commit 7485a82
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
15 changes: 14 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import unittest

import pandas as pd

from tests import d1_path
from valentine.data_sources.utils import get_encoding, get_delimiter, is_date
from valentine.data_sources.utils import get_encoding, get_delimiter, is_date, add_noise_to_df_column
from valentine.utils.utils import is_sorted, convert_data_type


Expand Down Expand Up @@ -30,3 +32,14 @@ def test_get_delimiter(self):
def test_is_date(self):
date_str = "2019-04-26 18:03:50.941332"
assert is_date(date_str)

def test_add_noise_to_df_column(self):
# Tiny chance that this test will fail due to the random nature of the noise
test_df = pd.DataFrame({'a': [1.0, 2.0, 3.0], 'b': ['abcdefg', 'hijklmn', 'opqrst']})
assert_df = pd.DataFrame({'a': [1.0, 2.0, 3.0], 'b': ['abcdefg', 'hijklmn', 'opqrst']})
assert add_noise_to_df_column(test_df, 'a', 0.0)['a'].equals(assert_df['a'])
assert add_noise_to_df_column(test_df, 'b', 0.0)['b'].equals(assert_df['b'])
assert not add_noise_to_df_column(test_df, 'a', 0.5)['a'].equals(assert_df['a'])
assert not add_noise_to_df_column(test_df, 'b', 0.5)['b'].equals(assert_df['b'])
assert not add_noise_to_df_column(test_df, 'a', 0.99999)['a'].equals(assert_df['a'])
assert not add_noise_to_df_column(test_df, 'b', 0.99999)['b'].equals(assert_df['b'])
5 changes: 1 addition & 4 deletions valentine/data_sources/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,4 @@ def add_noise_to_df_column(df, column_name, noise_level):
for _ in range(df[column_name].shape[0]):
if np.random.rand() < noise_level:
df[column_name] = df[column_name].apply(lambda x: ''.join(np.random.permutation(list(str(x)))))
return df

# if __name__ == "__main__":
# add_noise_to_df_column(pd.DataFrame({'a': [1, 2, 3], 'b': ['abcdefg', 'hijklmn', 'opqrst']}), 'b', 0.99)
return df

0 comments on commit 7485a82

Please sign in to comment.