From c73611369cf66a52d234619b742c44a2bab8818e Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 26 Mar 2024 15:42:28 +0000 Subject: [PATCH] Skip empty or invalid lines when anonymising edi_data --- ...CHIEF_LIVE_SPIRE_licenceData_78859_invalid | 5 +++++ ...CHIEF_LIVE_SPIRE_licenceData_78859_unitNAR | 2 +- .../tests/test_anonymised_dumps.py | 21 ++++++++++++++++++- mail/anonymisers.py | 5 ++++- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_invalid diff --git a/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_invalid b/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_invalid new file mode 100644 index 00000000..4b6a0172 --- /dev/null +++ b/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_invalid @@ -0,0 +1,5 @@ +1\fileHeader\SPIRE\CHIEF\licenceData\202008101531\71859\N +valid header and footer but invalid licence lines + + +5\fileTrailer\0 diff --git a/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_unitNAR b/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_unitNAR index 31438073..5cdccf40 100644 --- a/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_unitNAR +++ b/anonymised_db_dumps/tests/CHIEF_LIVE_SPIRE_licenceData_78859_unitNAR @@ -13,4 +13,4 @@ 13\restrictions\Provisos may apply please see licence 14\line\1\\\\\Techn PRODUCT\Q\\030\\1 15\end\licence\7 -16\fileTrailer\2 \ No newline at end of file +16\fileTrailer\2 diff --git a/anonymised_db_dumps/tests/test_anonymised_dumps.py b/anonymised_db_dumps/tests/test_anonymised_dumps.py index f82ef8ff..71bd2bbe 100644 --- a/anonymised_db_dumps/tests/test_anonymised_dumps.py +++ b/anonymised_db_dumps/tests/test_anonymised_dumps.py @@ -94,6 +94,7 @@ def create_test_data(cls): data={"reference": "GBSIEL/2024/0000001/P", "action": "insert"}, ) cls.mail_invalid = MailFactory(edi_data="invalid edi data") + cls.mail_invalid_lines = cls.load_edi_data_from_file("CHIEF_LIVE_SPIRE_licenceData_78859_invalid") def get_licences_in_message(self, edi_data): message_lines = edi_data.split("\n") @@ -103,7 +104,10 @@ def get_licences_in_message(self, edi_data): start = 0 licences = [] for index in range(len(message_lines)): - line_type = message_lines[index].split("\\")[1] + tokens = message_lines[index].split("\\") + if len(tokens) < 2: + continue + line_type = tokens[1] if line_type == "licence": start = index if line_type == "end": @@ -118,6 +122,7 @@ def delete_test_data(cls): cls.siel_mail_nar.delete() cls.siel_mail_kgm.delete() cls.mail_invalid.delete() + cls.mail_invalid_lines.delete() cls.open_licences_mail.delete() @parameterized.expand( @@ -250,6 +255,20 @@ def test_mail_with_invalid_edi_data_anonymised(self): assert anonymised_mail.sent_data == f"{today}: sent_data contents anonymised" assert anonymised_mail.edi_data == f"{today}: invalid edi data" + def test_mail_with_valid_header_footer_invalid_lines_anonymised(self): + anonymised_mail = Mail.objects.get(id=self.mail_invalid_lines.id) + assert anonymised_mail.edi_filename == self.mail_invalid_lines.edi_filename + today = datetime.strftime(datetime.today().date(), "%d %B %Y") + assert anonymised_mail.raw_data == f"{today}: raw_data contents anonymised" + assert anonymised_mail.sent_data == f"{today}: sent_data contents anonymised" + licences = self.get_licences_in_message(anonymised_mail.edi_data) + assert len(licences) == 0 + + assert ( + anonymised_mail.edi_data + == "1\\fileHeader\\SPIRE\\CHIEF\\licenceData\\202008101531\\71859\\N\n5\\fileTrailer\\0" + ) + def test_licence_payload_anonymised(self): anonymised_licence_payload = LicencePayload.objects.get(id=self.licence_payload.id) assert anonymised_licence_payload.lite_id == self.licence_payload.lite_id diff --git a/mail/anonymisers.py b/mail/anonymisers.py index fac70c3a..37d446bd 100644 --- a/mail/anonymisers.py +++ b/mail/anonymisers.py @@ -63,12 +63,15 @@ def sanitize_product_line(line): def sanitize_edi_data(lines): - if "fileHeader" not in lines: + if "fileHeader" not in lines and "fileTrailer" not in lines: return f"{today()}: invalid edi data" output_lines = [] for line in lines.split("\n"): tokens = line.split("\\") + # skip empty or invalid lines + if len(tokens) < 2: + continue line_type = tokens[1] output_line = edi_data_sanitizer.get(line_type, lambda x: x)(line)