Skip to content

Commit

Permalink
Merge pull request #242 from uktrade/LTD-4776-fix-edi-data-anonymisation
Browse files Browse the repository at this point in the history
LTD-4776: Skip empty or invalid lines when anonymising edi_data
  • Loading branch information
saruniitr authored Mar 28, 2024
2 parents bc7a22e + c736113 commit 453f3e8
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1\fileHeader\SPIRE\CHIEF\licenceData\202008101531\71859\N
valid header and footer but invalid licence lines


5\fileTrailer\0
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
13\restrictions\Provisos may apply please see licence
14\line\1\\\\\Techn PRODUCT\Q\\030\\1
15\end\licence\7
16\fileTrailer\2
16\fileTrailer\2
21 changes: 20 additions & 1 deletion anonymised_db_dumps/tests/test_anonymised_dumps.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def create_test_data(cls):
data={"reference": "GBSIEL/2024/0000001/P", "action": "insert"},
)
cls.mail_invalid = MailFactory(edi_data="invalid edi data")
cls.mail_invalid_lines = cls.load_edi_data_from_file("CHIEF_LIVE_SPIRE_licenceData_78859_invalid")

def get_licences_in_message(self, edi_data):
message_lines = edi_data.split("\n")
Expand All @@ -103,7 +104,10 @@ def get_licences_in_message(self, edi_data):
start = 0
licences = []
for index in range(len(message_lines)):
line_type = message_lines[index].split("\\")[1]
tokens = message_lines[index].split("\\")
if len(tokens) < 2:
continue
line_type = tokens[1]
if line_type == "licence":
start = index
if line_type == "end":
Expand All @@ -118,6 +122,7 @@ def delete_test_data(cls):
cls.siel_mail_nar.delete()
cls.siel_mail_kgm.delete()
cls.mail_invalid.delete()
cls.mail_invalid_lines.delete()
cls.open_licences_mail.delete()

@parameterized.expand(
Expand Down Expand Up @@ -250,6 +255,20 @@ def test_mail_with_invalid_edi_data_anonymised(self):
assert anonymised_mail.sent_data == f"{today}: sent_data contents anonymised"
assert anonymised_mail.edi_data == f"{today}: invalid edi data"

def test_mail_with_valid_header_footer_invalid_lines_anonymised(self):
anonymised_mail = Mail.objects.get(id=self.mail_invalid_lines.id)
assert anonymised_mail.edi_filename == self.mail_invalid_lines.edi_filename
today = datetime.strftime(datetime.today().date(), "%d %B %Y")
assert anonymised_mail.raw_data == f"{today}: raw_data contents anonymised"
assert anonymised_mail.sent_data == f"{today}: sent_data contents anonymised"
licences = self.get_licences_in_message(anonymised_mail.edi_data)
assert len(licences) == 0

assert (
anonymised_mail.edi_data
== "1\\fileHeader\\SPIRE\\CHIEF\\licenceData\\202008101531\\71859\\N\n5\\fileTrailer\\0"
)

def test_licence_payload_anonymised(self):
anonymised_licence_payload = LicencePayload.objects.get(id=self.licence_payload.id)
assert anonymised_licence_payload.lite_id == self.licence_payload.lite_id
Expand Down
5 changes: 4 additions & 1 deletion mail/anonymisers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,15 @@ def sanitize_product_line(line):

def sanitize_edi_data(lines):

if "fileHeader" not in lines:
if "fileHeader" not in lines and "fileTrailer" not in lines:
return f"{today()}: invalid edi data"

output_lines = []
for line in lines.split("\n"):
tokens = line.split("\\")
# skip empty or invalid lines
if len(tokens) < 2:
continue
line_type = tokens[1]
output_line = edi_data_sanitizer.get(line_type, lambda x: x)(line)

Expand Down

0 comments on commit 453f3e8

Please sign in to comment.