Skip to content

Commit

Permalink
Merge pull request #240 from amosproj/feature/229-more-test-cases
Browse files Browse the repository at this point in the history
Feature/229 more test cases
  • Loading branch information
felix-zailskas authored Feb 6, 2024
2 parents d2fb7ec + a38dc94 commit 55d71fe
Show file tree
Hide file tree
Showing 6 changed files with 459 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ report.pdf
**/cache/*

!.gitkeep

# testing
.coverage
16 changes: 16 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>

import pandas as pd


def mock_hash_check(
self,
lead_data: pd.Series,
data_fill_function: callable,
step_name: str,
fields_tofill: list[str],
*args,
**kwargs,
):
return data_fill_function(*args, **kwargs)
123 changes: 123 additions & 0 deletions tests/steps/test_analyze_emails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>

import unittest
from unittest.mock import MagicMock, patch

import pandas as pd

from bdc.steps.analyze_emails import (
AnalyzeEmails,
analyze_email_account,
extract_custom_domain,
)
from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator
from tests import mock_hash_check


class TestExtractCustomDomain(unittest.TestCase):
def test_valid_email(self):
email = "[email protected]"
result = extract_custom_domain(email)
expected = pd.Series(["example.com", True])
self.assertTrue(result.equals(expected))

def test_invalid_email(self):
email = "invalid_email"
result = extract_custom_domain(email)
expected = pd.Series([None, False])
self.assertTrue(result.equals(expected))

def test_email_with_subdomain(self):
email = "[email protected]"
result = extract_custom_domain(email)
expected = pd.Series(["sub.example.com", True])
self.assertTrue(result.equals(expected))

def test_empty_email(self):
email = ""
result = extract_custom_domain(email)
expected = pd.Series([None, False])
self.assertTrue(result.equals(expected))


class TestAnalyzeEmailAccount(unittest.TestCase):
def _init_lead(self, Email: str, email_valid: bool):
lead = {
"First Name": "John",
"Last Name": "Doe",
"Email": Email,
"email_valid": email_valid,
}
return lead

def test_valid_email_account(self):
lead = self._init_lead(Email="[email protected]", email_valid=True)
result = analyze_email_account(lead)
expected = pd.Series([True, True])
self.assertTrue(result.equals(expected))

def test_invalid_email_account(self):
lead = self._init_lead(Email="invalid_email", email_valid=False)
result = analyze_email_account(lead)
expected = pd.Series([False, False])
self.assertTrue(result.equals(expected))

def test_missing_first_name(self):
lead = self._init_lead(Email="[email protected]", email_valid=True)
result = analyze_email_account(lead)
expected = pd.Series([True, False])
self.assertTrue(result.equals(expected))

def test_missing_last_name(self):
lead = self._init_lead(Email="[email protected]", email_valid=True)
result = analyze_email_account(lead)
expected = pd.Series([False, True])
self.assertTrue(result.equals(expected))

def test_missing_names(self):
lead = self._init_lead(Email="[email protected]", email_valid=True)
lead = {"Email": "[email protected]", "email_valid": True}
result = analyze_email_account(lead)
expected = pd.Series([False, False])
self.assertTrue(result.equals(expected))


class TestStepExecution(unittest.TestCase):
step: AnalyzeEmails

def setUp(self):
lead_data = {
"First Name": ["John"] * 3,
"Last Name": ["Doe"] * 3,
"Email": [
"[email protected]",
"invalid_email",
"[email protected]",
],
}
self.step = AnalyzeEmails(force_refresh=True)
self.step.df = pd.DataFrame(lead_data)

@patch.object(LeadHashGenerator, "hash_check", mock_hash_check)
def test_run_method(self):
result = self.step.run()
assert type(result) is pd.DataFrame
columns = result.columns.to_list()
assert all(
col in columns
for col in [
"First Name",
"Last Name",
"Email",
"domain",
"email_valid",
"first_name_in_account",
"last_name_in_account",
]
)
assert result["domain"].to_list() == ["john.com", None, None]


if __name__ == "__main__":
unittest.main()
51 changes: 51 additions & 0 deletions tests/steps/test_hash_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>

import hashlib
import unittest

import pandas as pd

from bdc.steps.hash_generator import HashGenerator


class TestStepExecution(unittest.TestCase):
def setUp(self):
self.lead_data = {
"First Name": ["John"],
"Last Name": ["Doe"],
"Company / Account": ["ABC Corp"],
"Phone": ["+4912345678"],
"Email": ["[email protected]"],
}
self.step = HashGenerator(force_refresh=True)
self.step.df = pd.DataFrame(self.lead_data)

def test_hash_lead(self):
# Calculate the expected hash manually based on the data
expected_hash = hashlib.sha256(
("John" + "Doe" + "ABC Corp" + "+4912345678" + "[email protected]").encode()
).hexdigest()

# Call the hash_lead method with the sample data
result = self.step.run()

# Assert that the actual hash matches the expected hash
assert type(result) is pd.DataFrame
columns = result.columns.to_list()
assert all(
col in columns
for col in [
"First Name",
"Last Name",
"Email",
"Company / Account",
"Phone",
"lead_hash",
]
)
self.assertEqual(result.iloc[0]["lead_hash"], expected_hash)


if __name__ == "__main__":
unittest.main()
114 changes: 114 additions & 0 deletions tests/steps/test_preprocess_phonenumbers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>

import unittest
from unittest.mock import patch

import pandas as pd

from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator
from bdc.steps.preprocess_phonenumbers import PreprocessPhonenumbers
from tests import mock_hash_check


class TestStepExecution(unittest.TestCase):
def setUp(self):
self.lead_data = {
"First Name": ["John"] * 7,
"Last Name": ["Doe"] * 7,
"Phone": [
"4930183992170",
"invalid_phone",
"442087599036",
"3197010281402",
"436601359011",
"33757056600",
"495111233421",
],
}
self.step = PreprocessPhonenumbers(force_refresh=True)
self.step.df = pd.DataFrame(self.lead_data)
self.formatted_gt = [
"+49 30 183992170",
"",
"+44 20 8759 9036",
"+31 970 102 81402",
"+43 660 1359011",
"+33 7 57 05 66 00",
"+49 511 1233421",
]
self.country_gt = [
"Germany",
"",
"United Kingdom",
"Netherlands",
"Austria",
"France",
"Germany",
]
self.area_gt = [
"Berlin",
"",
"London",
"",
"",
"",
"Hannover",
]
self.valid_gt = [
True,
False,
True,
True,
True,
True,
True,
]
self.possible_gt = [
True,
False,
True,
True,
True,
True,
True,
]

@patch.object(LeadHashGenerator, "hash_check", mock_hash_check)
def test_hash_lead(self):
result = self.step.run()

assert type(result) is pd.DataFrame
columns = result.columns.to_list()
assert all(
col in columns
for col in [
"First Name",
"Last Name",
"Phone",
"number_formatted",
"number_country",
"number_area",
"number_valid",
"number_possible",
]
)
# test formatted number
for test, gt in zip(result["number_formatted"].to_list(), self.formatted_gt):
self.assertEqual(test, gt)
# test country
for test, gt in zip(result["number_country"].to_list(), self.country_gt):
self.assertEqual(test, gt)
# test area
for test, gt in zip(result["number_area"].to_list(), self.area_gt):
self.assertEqual(test, gt)
# test valid
for test, gt in zip(result["number_valid"].to_list(), self.valid_gt):
self.assertEqual(test, gt)
# test possible
for test, gt in zip(result["number_possible"].to_list(), self.possible_gt):
self.assertEqual(test, gt)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 55d71fe

Please sign in to comment.