generated from amosproj/amos202Xss0Y-projname
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #240 from amosproj/feature/229-more-test-cases
Feature/229 more test cases
- Loading branch information
Showing
6 changed files
with
459 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,3 +78,6 @@ report.pdf | |
**/cache/* | ||
|
||
!.gitkeep | ||
|
||
# testing | ||
.coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]> | ||
|
||
import pandas as pd | ||
|
||
|
||
def mock_hash_check( | ||
self, | ||
lead_data: pd.Series, | ||
data_fill_function: callable, | ||
step_name: str, | ||
fields_tofill: list[str], | ||
*args, | ||
**kwargs, | ||
): | ||
return data_fill_function(*args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]> | ||
|
||
import unittest | ||
from unittest.mock import MagicMock, patch | ||
|
||
import pandas as pd | ||
|
||
from bdc.steps.analyze_emails import ( | ||
AnalyzeEmails, | ||
analyze_email_account, | ||
extract_custom_domain, | ||
) | ||
from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator | ||
from tests import mock_hash_check | ||
|
||
|
||
class TestExtractCustomDomain(unittest.TestCase): | ||
def test_valid_email(self): | ||
email = "[email protected]" | ||
result = extract_custom_domain(email) | ||
expected = pd.Series(["example.com", True]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_invalid_email(self): | ||
email = "invalid_email" | ||
result = extract_custom_domain(email) | ||
expected = pd.Series([None, False]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_email_with_subdomain(self): | ||
email = "[email protected]" | ||
result = extract_custom_domain(email) | ||
expected = pd.Series(["sub.example.com", True]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_empty_email(self): | ||
email = "" | ||
result = extract_custom_domain(email) | ||
expected = pd.Series([None, False]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
|
||
class TestAnalyzeEmailAccount(unittest.TestCase): | ||
def _init_lead(self, Email: str, email_valid: bool): | ||
lead = { | ||
"First Name": "John", | ||
"Last Name": "Doe", | ||
"Email": Email, | ||
"email_valid": email_valid, | ||
} | ||
return lead | ||
|
||
def test_valid_email_account(self): | ||
lead = self._init_lead(Email="[email protected]", email_valid=True) | ||
result = analyze_email_account(lead) | ||
expected = pd.Series([True, True]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_invalid_email_account(self): | ||
lead = self._init_lead(Email="invalid_email", email_valid=False) | ||
result = analyze_email_account(lead) | ||
expected = pd.Series([False, False]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_missing_first_name(self): | ||
lead = self._init_lead(Email="[email protected]", email_valid=True) | ||
result = analyze_email_account(lead) | ||
expected = pd.Series([True, False]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_missing_last_name(self): | ||
lead = self._init_lead(Email="[email protected]", email_valid=True) | ||
result = analyze_email_account(lead) | ||
expected = pd.Series([False, True]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
def test_missing_names(self): | ||
lead = self._init_lead(Email="[email protected]", email_valid=True) | ||
lead = {"Email": "[email protected]", "email_valid": True} | ||
result = analyze_email_account(lead) | ||
expected = pd.Series([False, False]) | ||
self.assertTrue(result.equals(expected)) | ||
|
||
|
||
class TestStepExecution(unittest.TestCase): | ||
step: AnalyzeEmails | ||
|
||
def setUp(self): | ||
lead_data = { | ||
"First Name": ["John"] * 3, | ||
"Last Name": ["Doe"] * 3, | ||
"Email": [ | ||
"[email protected]", | ||
"invalid_email", | ||
"[email protected]", | ||
], | ||
} | ||
self.step = AnalyzeEmails(force_refresh=True) | ||
self.step.df = pd.DataFrame(lead_data) | ||
|
||
@patch.object(LeadHashGenerator, "hash_check", mock_hash_check) | ||
def test_run_method(self): | ||
result = self.step.run() | ||
assert type(result) is pd.DataFrame | ||
columns = result.columns.to_list() | ||
assert all( | ||
col in columns | ||
for col in [ | ||
"First Name", | ||
"Last Name", | ||
"Email", | ||
"domain", | ||
"email_valid", | ||
"first_name_in_account", | ||
"last_name_in_account", | ||
] | ||
) | ||
assert result["domain"].to_list() == ["john.com", None, None] | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]> | ||
|
||
import hashlib | ||
import unittest | ||
|
||
import pandas as pd | ||
|
||
from bdc.steps.hash_generator import HashGenerator | ||
|
||
|
||
class TestStepExecution(unittest.TestCase): | ||
def setUp(self): | ||
self.lead_data = { | ||
"First Name": ["John"], | ||
"Last Name": ["Doe"], | ||
"Company / Account": ["ABC Corp"], | ||
"Phone": ["+4912345678"], | ||
"Email": ["[email protected]"], | ||
} | ||
self.step = HashGenerator(force_refresh=True) | ||
self.step.df = pd.DataFrame(self.lead_data) | ||
|
||
def test_hash_lead(self): | ||
# Calculate the expected hash manually based on the data | ||
expected_hash = hashlib.sha256( | ||
("John" + "Doe" + "ABC Corp" + "+4912345678" + "[email protected]").encode() | ||
).hexdigest() | ||
|
||
# Call the hash_lead method with the sample data | ||
result = self.step.run() | ||
|
||
# Assert that the actual hash matches the expected hash | ||
assert type(result) is pd.DataFrame | ||
columns = result.columns.to_list() | ||
assert all( | ||
col in columns | ||
for col in [ | ||
"First Name", | ||
"Last Name", | ||
"Email", | ||
"Company / Account", | ||
"Phone", | ||
"lead_hash", | ||
] | ||
) | ||
self.assertEqual(result.iloc[0]["lead_hash"], expected_hash) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]> | ||
|
||
import unittest | ||
from unittest.mock import patch | ||
|
||
import pandas as pd | ||
|
||
from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator | ||
from bdc.steps.preprocess_phonenumbers import PreprocessPhonenumbers | ||
from tests import mock_hash_check | ||
|
||
|
||
class TestStepExecution(unittest.TestCase): | ||
def setUp(self): | ||
self.lead_data = { | ||
"First Name": ["John"] * 7, | ||
"Last Name": ["Doe"] * 7, | ||
"Phone": [ | ||
"4930183992170", | ||
"invalid_phone", | ||
"442087599036", | ||
"3197010281402", | ||
"436601359011", | ||
"33757056600", | ||
"495111233421", | ||
], | ||
} | ||
self.step = PreprocessPhonenumbers(force_refresh=True) | ||
self.step.df = pd.DataFrame(self.lead_data) | ||
self.formatted_gt = [ | ||
"+49 30 183992170", | ||
"", | ||
"+44 20 8759 9036", | ||
"+31 970 102 81402", | ||
"+43 660 1359011", | ||
"+33 7 57 05 66 00", | ||
"+49 511 1233421", | ||
] | ||
self.country_gt = [ | ||
"Germany", | ||
"", | ||
"United Kingdom", | ||
"Netherlands", | ||
"Austria", | ||
"France", | ||
"Germany", | ||
] | ||
self.area_gt = [ | ||
"Berlin", | ||
"", | ||
"London", | ||
"", | ||
"", | ||
"", | ||
"Hannover", | ||
] | ||
self.valid_gt = [ | ||
True, | ||
False, | ||
True, | ||
True, | ||
True, | ||
True, | ||
True, | ||
] | ||
self.possible_gt = [ | ||
True, | ||
False, | ||
True, | ||
True, | ||
True, | ||
True, | ||
True, | ||
] | ||
|
||
@patch.object(LeadHashGenerator, "hash_check", mock_hash_check) | ||
def test_hash_lead(self): | ||
result = self.step.run() | ||
|
||
assert type(result) is pd.DataFrame | ||
columns = result.columns.to_list() | ||
assert all( | ||
col in columns | ||
for col in [ | ||
"First Name", | ||
"Last Name", | ||
"Phone", | ||
"number_formatted", | ||
"number_country", | ||
"number_area", | ||
"number_valid", | ||
"number_possible", | ||
] | ||
) | ||
# test formatted number | ||
for test, gt in zip(result["number_formatted"].to_list(), self.formatted_gt): | ||
self.assertEqual(test, gt) | ||
# test country | ||
for test, gt in zip(result["number_country"].to_list(), self.country_gt): | ||
self.assertEqual(test, gt) | ||
# test area | ||
for test, gt in zip(result["number_area"].to_list(), self.area_gt): | ||
self.assertEqual(test, gt) | ||
# test valid | ||
for test, gt in zip(result["number_valid"].to_list(), self.valid_gt): | ||
self.assertEqual(test, gt) | ||
# test possible | ||
for test, gt in zip(result["number_possible"].to_list(), self.possible_gt): | ||
self.assertEqual(test, gt) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Oops, something went wrong.