Merge pull request #240 from amosproj/feature/229-more-test-cases

Feature/229 more test cases
amosproj · Feb 6, 2024 · 55d71fe · 55d71fe
2 parents d2fb7ec + a38dc94
commit 55d71fe
Show file tree

Hide file tree

Showing 6 changed files with 459 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -78,3 +78,6 @@ report.pdf
 **/cache/*
 
 !.gitkeep
+
+# testing
+.coverage
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>
+
+import pandas as pd
+
+
+def mock_hash_check(
+    self,
+    lead_data: pd.Series,
+    data_fill_function: callable,
+    step_name: str,
+    fields_tofill: list[str],
+    *args,
+    **kwargs,
+):
+    return data_fill_function(*args, **kwargs)
diff --git a/tests/steps/test_analyze_emails.py b/tests/steps/test_analyze_emails.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>
+
+import unittest
+from unittest.mock import MagicMock, patch
+
+import pandas as pd
+
+from bdc.steps.analyze_emails import (
+    AnalyzeEmails,
+    analyze_email_account,
+    extract_custom_domain,
+)
+from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator
+from tests import mock_hash_check
+
+
+class TestExtractCustomDomain(unittest.TestCase):
+    def test_valid_email(self):
+        email = "[email protected]"
+        result = extract_custom_domain(email)
+        expected = pd.Series(["example.com", True])
+        self.assertTrue(result.equals(expected))
+
+    def test_invalid_email(self):
+        email = "invalid_email"
+        result = extract_custom_domain(email)
+        expected = pd.Series([None, False])
+        self.assertTrue(result.equals(expected))
+
+    def test_email_with_subdomain(self):
+        email = "[email protected]"
+        result = extract_custom_domain(email)
+        expected = pd.Series(["sub.example.com", True])
+        self.assertTrue(result.equals(expected))
+
+    def test_empty_email(self):
+        email = ""
+        result = extract_custom_domain(email)
+        expected = pd.Series([None, False])
+        self.assertTrue(result.equals(expected))
+
+
+class TestAnalyzeEmailAccount(unittest.TestCase):
+    def _init_lead(self, Email: str, email_valid: bool):
+        lead = {
+            "First Name": "John",
+            "Last Name": "Doe",
+            "Email": Email,
+            "email_valid": email_valid,
+        }
+        return lead
+
+    def test_valid_email_account(self):
+        lead = self._init_lead(Email="[email protected]", email_valid=True)
+        result = analyze_email_account(lead)
+        expected = pd.Series([True, True])
+        self.assertTrue(result.equals(expected))
+
+    def test_invalid_email_account(self):
+        lead = self._init_lead(Email="invalid_email", email_valid=False)
+        result = analyze_email_account(lead)
+        expected = pd.Series([False, False])
+        self.assertTrue(result.equals(expected))
+
+    def test_missing_first_name(self):
+        lead = self._init_lead(Email="[email protected]", email_valid=True)
+        result = analyze_email_account(lead)
+        expected = pd.Series([True, False])
+        self.assertTrue(result.equals(expected))
+
+    def test_missing_last_name(self):
+        lead = self._init_lead(Email="[email protected]", email_valid=True)
+        result = analyze_email_account(lead)
+        expected = pd.Series([False, True])
+        self.assertTrue(result.equals(expected))
+
+    def test_missing_names(self):
+        lead = self._init_lead(Email="[email protected]", email_valid=True)
+        lead = {"Email": "[email protected]", "email_valid": True}
+        result = analyze_email_account(lead)
+        expected = pd.Series([False, False])
+        self.assertTrue(result.equals(expected))
+
+
+class TestStepExecution(unittest.TestCase):
+    step: AnalyzeEmails
+
+    def setUp(self):
+        lead_data = {
+            "First Name": ["John"] * 3,
+            "Last Name": ["Doe"] * 3,
+            "Email": [
+                "[email protected]",
+                "invalid_email",
+                "[email protected]",
+            ],
+        }
+        self.step = AnalyzeEmails(force_refresh=True)
+        self.step.df = pd.DataFrame(lead_data)
+
+    @patch.object(LeadHashGenerator, "hash_check", mock_hash_check)
+    def test_run_method(self):
+        result = self.step.run()
+        assert type(result) is pd.DataFrame
+        columns = result.columns.to_list()
+        assert all(
+            col in columns
+            for col in [
+                "First Name",
+                "Last Name",
+                "Email",
+                "domain",
+                "email_valid",
+                "first_name_in_account",
+                "last_name_in_account",
+            ]
+        )
+        assert result["domain"].to_list() == ["john.com", None, None]
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/steps/test_hash_generator.py b/tests/steps/test_hash_generator.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>
+
+import hashlib
+import unittest
+
+import pandas as pd
+
+from bdc.steps.hash_generator import HashGenerator
+
+
+class TestStepExecution(unittest.TestCase):
+    def setUp(self):
+        self.lead_data = {
+            "First Name": ["John"],
+            "Last Name": ["Doe"],
+            "Company / Account": ["ABC Corp"],
+            "Phone": ["+4912345678"],
+            "Email": ["[email protected]"],
+        }
+        self.step = HashGenerator(force_refresh=True)
+        self.step.df = pd.DataFrame(self.lead_data)
+
+    def test_hash_lead(self):
+        # Calculate the expected hash manually based on the data
+        expected_hash = hashlib.sha256(
+            ("John" + "Doe" + "ABC Corp" + "+4912345678" + "[email protected]").encode()
+        ).hexdigest()
+
+        # Call the hash_lead method with the sample data
+        result = self.step.run()
+
+        # Assert that the actual hash matches the expected hash
+        assert type(result) is pd.DataFrame
+        columns = result.columns.to_list()
+        assert all(
+            col in columns
+            for col in [
+                "First Name",
+                "Last Name",
+                "Email",
+                "Company / Account",
+                "Phone",
+                "lead_hash",
+            ]
+        )
+        self.assertEqual(result.iloc[0]["lead_hash"], expected_hash)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/steps/test_preprocess_phonenumbers.py b/tests/steps/test_preprocess_phonenumbers.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Felix Zailskas <[email protected]>
+
+import unittest
+from unittest.mock import patch
+
+import pandas as pd
+
+from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator
+from bdc.steps.preprocess_phonenumbers import PreprocessPhonenumbers
+from tests import mock_hash_check
+
+
+class TestStepExecution(unittest.TestCase):
+    def setUp(self):
+        self.lead_data = {
+            "First Name": ["John"] * 7,
+            "Last Name": ["Doe"] * 7,
+            "Phone": [
+                "4930183992170",
+                "invalid_phone",
+                "442087599036",
+                "3197010281402",
+                "436601359011",
+                "33757056600",
+                "495111233421",
+            ],
+        }
+        self.step = PreprocessPhonenumbers(force_refresh=True)
+        self.step.df = pd.DataFrame(self.lead_data)
+        self.formatted_gt = [
+            "+49 30 183992170",
+            "",
+            "+44 20 8759 9036",
+            "+31 970 102 81402",
+            "+43 660 1359011",
+            "+33 7 57 05 66 00",
+            "+49 511 1233421",
+        ]
+        self.country_gt = [
+            "Germany",
+            "",
+            "United Kingdom",
+            "Netherlands",
+            "Austria",
+            "France",
+            "Germany",
+        ]
+        self.area_gt = [
+            "Berlin",
+            "",
+            "London",
+            "",
+            "",
+            "",
+            "Hannover",
+        ]
+        self.valid_gt = [
+            True,
+            False,
+            True,
+            True,
+            True,
+            True,
+            True,
+        ]
+        self.possible_gt = [
+            True,
+            False,
+            True,
+            True,
+            True,
+            True,
+            True,
+        ]
+
+    @patch.object(LeadHashGenerator, "hash_check", mock_hash_check)
+    def test_hash_lead(self):
+        result = self.step.run()
+
+        assert type(result) is pd.DataFrame
+        columns = result.columns.to_list()
+        assert all(
+            col in columns
+            for col in [
+                "First Name",
+                "Last Name",
+                "Phone",
+                "number_formatted",
+                "number_country",
+                "number_area",
+                "number_valid",
+                "number_possible",
+            ]
+        )
+        # test formatted number
+        for test, gt in zip(result["number_formatted"].to_list(), self.formatted_gt):
+            self.assertEqual(test, gt)
+        # test country
+        for test, gt in zip(result["number_country"].to_list(), self.country_gt):
+            self.assertEqual(test, gt)
+        # test area
+        for test, gt in zip(result["number_area"].to_list(), self.area_gt):
+            self.assertEqual(test, gt)
+        # test valid
+        for test, gt in zip(result["number_valid"].to_list(), self.valid_gt):
+            self.assertEqual(test, gt)
+        # test possible
+        for test, gt in zip(result["number_possible"].to_list(), self.possible_gt):
+            self.assertEqual(test, gt)
+
+
+if __name__ == "__main__":
+    unittest.main()