Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jason-cleaner unittest updates #162

Merged
merged 5 commits into from
Oct 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 240 additions & 6 deletions src/tester/test_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,10 +948,244 @@ def test_parser_end_to_end(self, county="hays", case_number='123456'):
test = True)

class CleanTestCase(unittest.TestCase):
def test_cleaner_hays(self):
now = datetime.now()
now_string = now.strftime("%H:%M:%S")
# Call the function being tested
# cleaner(counter="hays")
def setUp(self):
self.cleaner = cleaner.Cleaner() # Create Cleaner instance here to avoid repeating this in every test

@patch('os.makedirs')
@patch('os.path.exists', return_value=False)
def test_get_or_create_folder_path(self, mock_exists, mock_makedirs):
mock_exists.return_value = False
county = "hays"
folder_type = "case_json"
cleaner_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "cleaner"))
expected_path = os.path.join(cleaner_dir, "..", "..", "data", county, folder_type)

folder_path = self.cleaner.get_or_create_folder_path(county, folder_type)

mock_exists.assert_called_once_with(expected_path) # Check if os.path.exists was called
mock_makedirs.assert_called_once_with(expected_path) # Check if os.makedirs was called
self.assertEqual(folder_path, expected_path) # Check that the path is correct

# Test when folder already exists
mock_exists.return_value = True
folder_path = self.cleaner.get_or_create_folder_path(county, folder_type)
mock_makedirs.assert_called_once() # Should not be called again

def test_load_json_file(self):
# Test successful load
with patch("builtins.open", new_callable=mock_open, read_data='{"key": "value"}'):
result = self.cleaner.load_json_file("fake_path.json")
self.assertEqual(result, {"key": "value"})

# Test file not found
with patch("builtins.open", side_effect=FileNotFoundError):
result = self.cleaner.load_json_file("nonexistent.json")
self.assertEqual(result, {})

# Test invalid JSON
with patch("builtins.open", new_callable=mock_open, read_data='invalid json'):
result = self.cleaner.load_json_file("invalid.json")
self.assertEqual(result, {})

def test_load_and_map_charge_names(self):
# Test successful mapping
test_data = '[{"charge_name": "Charge1", "details": "Some details"}]'
with patch("builtins.open", new_callable=mock_open, read_data=test_data):
result = self.cleaner.load_and_map_charge_names("fake_path.json")
self.assertEqual(result, {"Charge1": {"charge_name": "Charge1", "details": "Some details"}})

# Test empty file
with patch("builtins.open", new_callable=mock_open, read_data='[]'):
with self.assertRaises(FileNotFoundError):
self.cleaner.load_and_map_charge_names("empty.json")

# Test file not found
with patch("builtins.open", side_effect=FileNotFoundError):
with self.assertRaises(FileNotFoundError):
self.cleaner.load_and_map_charge_names("nonexistent.json")

def test_hash_defense_attorney(self):
input_data = {
"party information": {
"defense attorney": "John Doe",
"defense attorney phone number": "555-1234"
}
}
result = self.cleaner.hash_defense_attorney(input_data)
self.assertIsInstance(result, str)
self.assertNotEqual(result, "John Doe:555-1234")

# Test consistency
result2 = self.cleaner.hash_defense_attorney(input_data)
self.assertEqual(result, result2)

# Test different input
input_data2 = {
"party information": {
"defense attorney": "Jane Doe",
"defense attorney phone number": "555-5678"
}
}
result3 = self.cleaner.hash_defense_attorney(input_data2)
self.assertNotEqual(result, result3)

# Test missing data
input_data3 = {"party information": {}}
result4 = self.cleaner.hash_defense_attorney(input_data3)
self.assertEqual(result4, "")

def test_redact_cause_number(self):
# Test case 1: Normal input and consistency
input_dict = {"code": "123-ABC-456"}
result1 = self.cleaner.redact_cause_number(input_dict)
result2 = self.cleaner.redact_cause_number(input_dict)

self.assertIsInstance(result1, str)
self.assertEqual(len(result1), 16) # xxHash produces a 16-character hexadecimal string
self.assertEqual(result1, result2) # Ensure consistent hashing

# Test case 2: Different input produces different hash
input_dict2 = {"code": "789-XYZ-012"}
result3 = self.cleaner.redact_cause_number(input_dict2)
self.assertNotEqual(result1, result3)

# Test case 3: Empty input
self.assertNotEqual(self.cleaner.redact_cause_number({"code": ""}), result1)

# Test case 4: Missing 'code' key
with self.assertRaises(KeyError):
self.cleaner.redact_cause_number({})

def test_process_charges(self):
charges = [
{"level": "Misdemeanor", "charges": "Charge1", "statute": "123", "date": "12/01/2023"},
{"level": "Felony", "charges": "Charge2", "statute": "456", "date": "11/15/2023"},
]
charge_mapping = {
"Charge1": {"mapped_field": "mapped_value1"},
"Charge2": {"mapped_field": "mapped_value2"}
}

processed_charges, earliest_date = self.cleaner.process_charges(charges, charge_mapping)

self.assertEqual(len(processed_charges), 2)
self.assertEqual(processed_charges[0]['charge_date'], "2023-12-01")
self.assertEqual(processed_charges[1]['charge_date'], "2023-11-15")
self.assertEqual(earliest_date, "2023-11-15")

# Test invalid date
charges_invalid_date = [{"level": "Misdemeanor", "charges": "Charge1", "statute": "123", "date": "invalid"}]
processed_charges, earliest_date = self.cleaner.process_charges(charges_invalid_date, charge_mapping)
self.assertEqual(len(processed_charges), 0)
self.assertEqual(earliest_date, "")

def test_contains_good_motion(self):
self.assertTrue(self.cleaner.contains_good_motion("Motion To Suppress", "Event: Motion To Suppress"))
self.assertTrue(self.cleaner.contains_good_motion("Motion To Suppress", ["Other", "Motion To Suppress"]))
self.assertFalse(self.cleaner.contains_good_motion("Motion To Suppress", "Other Motion"))
self.assertFalse(self.cleaner.contains_good_motion("Motion To Suppress", ["Other1", "Other2"]))

def test_find_good_motions(self):
events = [
"Motion To Suppress",
"Motion to Reduce Bond",
"Other Event",
"Motion For Speedy Trial"
]

result = self.cleaner.find_good_motions(events, cleaner.GOOD_MOTIONS)
self.assertEqual(len(result), 3)
self.assertEqual(result, ["Motion To Suppress", "Motion to Reduce Bond", "Motion For Speedy Trial"])

# Test with no matching motions
events_no_match = ["Other1", "Other2"]
result_no_match = self.cleaner.find_good_motions(events_no_match, cleaner.GOOD_MOTIONS)
self.assertEqual(result_no_match, [])

@patch("src.cleaner.Cleaner.load_json_file")
@patch("src.cleaner.Cleaner.write_json_output")
@patch("src.cleaner.Cleaner.load_and_map_charge_names")
def test_process_single_case(self, mock_load_map, mock_write, mock_load):
mock_load.return_value = {
"code": "123",
"county": "test_county",
"party information": {
"defense attorney": "John Doe",
"defense attorney phone number": "555-1234",
"appointed or retained": "appointed"
},
"charge information": [
{"level": "Misdemeanor", "charges": "Charge1", "statute": "123", "date": "12/01/2023"}
],
"other events and hearings": ["Motion To Suppress"],
"html_hash": "test_hash"
}
mock_load_map.return_value = {"Charge1": {"mapped_field": "mapped_value"}}

county = "test_county"
folder_path = "case_json_folder"
case_file = "case1.json"

self.cleaner.process_single_case(county, folder_path, case_file)

mock_load.assert_called_once()
mock_write.assert_called_once()

# Check that the output contains expected fields
output_data = mock_write.call_args[0][1]
self.assertTrue("case_number" in output_data)
self.assertTrue("charges" in output_data)
self.assertTrue("motions" in output_data)
self.assertTrue("defense_attorney" in output_data)
self.assertTrue("county" in output_data)
self.assertTrue("html_hash" in output_data)
self.assertTrue("attorney_type" in output_data)
self.assertTrue("earliest_charge_date" in output_data)
self.assertTrue("has_evidence_of_representation" in output_data)
self.assertTrue("parsing_date" in output_data)

@patch("os.listdir", return_value=["case1.json", "case2.json"])
@patch("src.cleaner.Cleaner.get_or_create_folder_path")
@patch("src.cleaner.Cleaner.process_single_case")
def test_process_json_files(self, mock_process, mock_get_folder, mock_listdir):
county = "test_county"
folder_path = "case_json_folder"
mock_get_folder.return_value = "cleaned_folder_path"

self.cleaner.process_json_files(county, folder_path)

mock_get_folder.assert_called_once_with(county, "case_json_cleaned")
self.assertEqual(mock_process.call_count, 2)
mock_process.assert_any_call(folder_path, "case1.json", "cleaned_folder_path")
mock_process.assert_any_call(folder_path, "case2.json", "cleaned_folder_path")

@patch("json.dump")
@patch("builtins.open", new_callable=mock_open)
def test_write_json_output(self, mock_file, mock_json_dump):
file_path = "test_output.json"
data = {"key": "value"}
self.cleaner.write_json_output(file_path, data)

mock_file.assert_called_once_with(file_path, "w")
mock_json_dump.assert_called_once_with(data, mock_file())

@patch.object(cleaner.Cleaner, 'get_or_create_folder_path')
@patch.object(cleaner.Cleaner, 'process_json_files')
def test_clean(self, mock_process_json_files, mock_get_folder):
mock_get_folder.return_value = "mock_path"
county = "hays"

with self.assertLogs(level='INFO') as log:
self.cleaner.clean(county)

self.assertTrue(f"INFO:root:Processing data for county: {county}" in log.output)
self.assertTrue(f"INFO:root:Completed processing for county: {county}" in log.output)

mock_get_folder.assert_called_once_with(county, "case_json")
mock_process_json_files.assert_called_once_with(county, "mock_path")

# Need to finish coding this.
# Test exception handling
mock_process_json_files.side_effect = Exception("Test error")
with self.assertLogs(level='ERROR') as log:
self.cleaner.clean(county)
self.assertIn(f"ERROR:root:Error during cleaning process for county: {county}. Error: Test error", log.output)
Loading