Skip to content

Commit

Permalink
Handle hidden Non-breaking space
Browse files Browse the repository at this point in the history
Users where copying in hidden non breaking spaces within the data.csv.
Prior to these changes, unexpected behaviour occured. These changes
validate the file prior to processing and ensure non breaking spaces are
relaced with normal spaces
  • Loading branch information
jamesgreen-moj committed Dec 28, 2023
1 parent 7982f32 commit c9f9b27
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 8 deletions.
16 changes: 12 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
from geocode import geocode, find_timezone, find_country_code

def clean_csv_rows_by_removing_nbsp(unformatted_csv_rows):
cleaned_rows = []

for data_dict in unformatted_csv_rows:
for key in data_dict:
cleaned_dict = {}

for key, value in data_dict.items():
# Replace '\xa0' with a regular space in the keys
cleaned_key = key.replace('\xa0', ' ')

data_dict[cleaned_key] = data_dict.pop(key)
# Add the cleaned key-value pair to the new dictionary
cleaned_dict[cleaned_key] = value

cleaned_rows.append(cleaned_dict)

return unformatted_csv_rows
return cleaned_rows

# Convert CSV file to JSON object.
def convert_csv_to_json(file_path):
Expand Down Expand Up @@ -59,7 +67,7 @@ def add_geocoding_to_json(data):
clean_json_data_without_geocoding = clean_csv_rows_by_removing_nbsp(json_data_without_geocoding)

json_data_with_geocoding = add_geocoding_to_json(
json_data_without_geocoding)
clean_json_data_without_geocoding)

juniper_script(
mist_username=os.environ.get('MIST_USERNAME'),
Expand Down
55 changes: 51 additions & 4 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import tempfile
import csv
from unittest.mock import patch
from src.main import convert_csv_to_json, add_geocoding_to_json
from src.main import convert_csv_to_json, add_geocoding_to_json, clean_csv_rows_by_removing_nbsp


class TestCsvToJson(unittest.TestCase):
Expand All @@ -15,7 +15,19 @@ def setUp(self):
{'Site Name': 'Test location 2', 'Site Address': '102 Petty France, London SW1H 9AJ', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE000BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'Test location 3', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'}
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'non\xa0breaking\xa0space\xa0test', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'}
]
self.csv_data_expected = [
{'Site Name': 'Test location 1', 'Site Address': '40 Mayflower Dr, Plymouth PL2 3DG', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '00000DD0000BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'Test location 2', 'Site Address': '102 Petty France, London SW1H 9AJ', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE000BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'Test location 3', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'non breaking space test', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'}
]
self.csv_file = tempfile.NamedTemporaryFile(
mode='w', delete=False, newline='', suffix='.csv')
Expand All @@ -28,11 +40,11 @@ def setUp(self):
'Wired NACS Radius Key'
])
self.csv_writer.writeheader()
self.csv_writer.writerows(self.csv_data)
self.csv_writer.writerows(self.csv_data_expected)
self.csv_file.close()

def test_convert_csv_to_json_valid_csv(self):
expected_json = self.csv_data
expected_json = self.csv_data_expected
actual_json = convert_csv_to_json(self.csv_file.name)
self.assertEqual(actual_json, expected_json)

Expand Down Expand Up @@ -90,3 +102,38 @@ def test_given_site_name_and_site_address_in_json_format_when_function_called_th
find_timezone.assert_called()
mock_find_country_code.assert_called()
mock_geocode.assert_called()


class TestCleanCSVRows(unittest.TestCase):

def test_clean_csv_rows_no_nbsp(self):
input_data = [
{'Site Name': 'Test location 1', 'Site Address': '40 Mayflower Dr, Plymouth PL2 3DG', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '00000DD0000BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'Test location 2', 'Site Address': '102 Petty France, London SW1H 9AJ', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE000BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'}
]
output_data = clean_csv_rows_by_removing_nbsp(input_data)
self.assertEqual(output_data, input_data, "No non-breaking spaces, should be unchanged")

def test_clean_csv_rows_with_nbsp(self):
input_data = [
{'Site\xa0Name': 'non breaking space test', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Site Name': 'non breaking space test', 'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB', 'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"', 'GovWifi Radius Key': '0D0E0DDE080BC0EEE000', 'Wired NACS Radius Key': '00000DD0000BC0EEE000'}
]
expected_output = [{'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"',
'GovWifi Radius Key': '0D0E0DDE080BC0EEE000',
'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB',
'Site Name': 'non breaking space test',
'Wired NACS Radius Key': '00000DD0000BC0EEE000'},
{'Enable GovWifi': ' "TRUE"',
'Enable MoJWifi': ' "FALSE"',
'GovWifi Radius Key': '0D0E0DDE080BC0EEE000',
'Site Address': 'Met Office, FitzRoy Road, Exeter, Devon, EX1 3PB',
'Site Name': 'non breaking space test',
'Wired NACS Radius Key': '00000DD0000BC0EEE000'}]
output_data = clean_csv_rows_by_removing_nbsp(input_data)
self.assertEqual(output_data, expected_output, "Non-breaking spaces should be removed")

0 comments on commit c9f9b27

Please sign in to comment.