From 5ee976aa6cc006df4f03dd87da910a476f9224d9 Mon Sep 17 00:00:00 2001 From: Stephen James Date: Fri, 29 Dec 2023 09:35:04 +0000 Subject: [PATCH] Added method to clean input csv of NBSP unicode chars The CSV file gets copied and pasted, into and out of Slack. While we would not recoomend this, it happened. This addition strips thos characters from the input csv and writes a clean file. --- src/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 8569bb2..2238cd4 100644 --- a/src/main.py +++ b/src/main.py @@ -5,6 +5,13 @@ import csv from geocode import geocode, find_timezone, find_country_code +# Strip non-breaking-space invisible characters +def replace_non_breaking_spaces(unsanitised, clean): + with open(unsanitised, "r") as input: + with open(clean, "w") as output: + for line in input: + line = line.replace("\xa0", " ") + output.write(line) # Convert CSV file to JSON object. def convert_csv_to_json(file_path): @@ -43,7 +50,11 @@ def add_geocoding_to_json(data): if __name__ == '__main__': - csv_file_path = os.getcwd() + '/../data_src/sites_with_clients.csv' + unsanitised_csv_file_path = os.getcwd() + '/../data_src/sites_with_clients.csv' + csv_file_path = os.getcwd() + '/../data_src/sites_with_clients.clean.csv' + + # Strip non-breaking-space invisible characters + replace_non_breaking_spaces(unsanitised_csv_file_path, csv_file_path) # Convert CSV to valid JSON json_data_without_geocoding = convert_csv_to_json(csv_file_path)