From b46fd17157e876a63a8b6040eaf1e5e3ca36b342 Mon Sep 17 00:00:00 2001
From: 5yn74x <41291962+plumped@users.noreply.github.com>
Date: Mon, 29 May 2023 12:16:07 +0200
Subject: [PATCH] Added camt.053 taglist as jsonfile for tag matching

---
 src/static/camt053_Tags.json | 174 +++++++++++++++++++++++++++++++++++
 src/xmlparser.py             |  43 +++------
 2 files changed, 189 insertions(+), 28 deletions(-)
 create mode 100644 src/static/camt053_Tags.json

diff --git a/src/static/camt053_Tags.json b/src/static/camt053_Tags.json
new file mode 100644
index 0000000..8e4a47d
--- /dev/null
+++ b/src/static/camt053_Tags.json
@@ -0,0 +1,174 @@
+{"tags": [
+	{"Ccy" : ""},
+	{"Acct" : ""},
+	{"AcctOwnrTxId" : ""},
+	{"AcctSvcrRef" : ""},
+	{"AcctSvcrTxId" : ""},
+	{"AddtlInf" : ""},
+	{"AddtlInfInd" : ""},
+	{"AddtlNtryInf" : ""},
+	{"AddtlStmtInf" : ""},
+	{"AddtlTxInf" : ""},
+	{"Agt" : ""},
+	{"Amt" : ""},
+	{"AmtDtls" : ""},
+	{"AnncdPstngAmt" : ""},
+	{"AnyBIC" : ""},
+	{"Avlbty" : ""},
+	{"Bal" : ""},
+	{"BICFI" : ""},
+	{"BkToCstmrStmt" : ""},
+	{"BkTxCd" : ""},
+	{"BookgDt" : ""},
+	{"Br" : ""},
+	{"BrnchId" : ""},
+	{"Btch" : ""},
+	{"CardTx" : ""},
+	{"CcyXchg" : ""},
+	{"Cd" : ""},
+	{"CdOrPrtry" : ""},
+	{"CdtDbtInd" : ""},
+	{"CdtLine" : ""},
+	{"CdtNtries" : ""},
+	{"Cdtr" : ""},
+	{"CdtrAcct" : ""},
+	{"CdtrAgt" : ""},
+	{"CdtrRefInf" : ""},
+	{"ChqNb" : ""},
+	{"ChrgInclInd" : ""},
+	{"Chrgs" : ""},
+	{"ClrSysId" : ""},
+	{"ClrSysMmbId" : ""},
+	{"ClrSysRef" : ""},
+	{"CntrValAmt" : ""},
+	{"ComssnWvrInd" : ""},
+	{"CorpActn" : ""},
+	{"CpyDplctInd" : ""},
+	{"CreDtTm" : ""},
+	{"CshDpst" : ""},
+	{"CtctDtls" : ""},
+	{"CtrctId" : ""},
+	{"CtryOfRes" : ""},
+	{"DbtNtries" : ""},
+	{"Dbtr" : ""},
+	{"DbtrAcct" : ""},
+	{"DbtrAgt" : ""},
+	{"DlvrgAgt" : ""},
+	{"Domn" : ""},
+	{"Dt" : ""},
+	{"DtTm" : ""},
+	{"ElctrncSeqNb" : ""},
+	{"EndToEndId" : ""},
+	{"EQSeq" : ""},
+	{"FcstInd" : ""},
+	{"FinInstnId" : ""},
+	{"Fmly" : ""},
+	{"FrDtTm" : ""},
+	{"FrToDt" : ""},
+	{"FrToSeq" : ""},
+	{"GrpHdr" : ""},
+	{"IBAN" : ""},
+	{"Id" : ""},
+	{"InitgPty" : ""},
+	{"InstdAgt" : ""},
+	{"InstdAmt" : ""},
+	{"InstgAgt" : ""},
+	{"InstrId" : ""},
+	{"IntrmyAgt1" : ""},
+	{"IntrmyAgt2" : ""},
+	{"IntrmyAgt3" : ""},
+	{"Intrst" : ""},
+	{"IssgAgt" : ""},
+	{"Issr" : ""},
+	{"LastPgInd" : ""},
+	{"LclInstrm" : ""},
+	{"LEI" : ""},
+	{"LglSeqNb" : ""},
+	{"LineDtls" : ""},
+	{"MktInfrstrctrTxId" : ""},
+	{"MmbId" : ""},
+	{"MndtId" : ""},
+	{"MsgId" : ""},
+	{"MsgNmId" : ""},
+	{"MsgPgntn" : ""},
+	{"MsgRcpt" : ""},
+	{"Nb" : ""},
+	{"NbOfNtries" : ""},
+	{"NbOfTxs" : ""},
+	{"Nm" : ""},
+	{"Ntry" : ""},
+	{"NtryDtls" : ""},
+	{"NtryRef" : ""},
+	{"OrgId" : ""},
+	{"OrgnlBizQry" : ""},
+	{"OrgnlBkTxCd" : ""},
+	{"Orgtr" : ""},
+	{"Othr" : ""},
+	{"Ownr" : ""},
+	{"PgNb" : ""},
+	{"PmtInfId" : ""},
+	{"PrcgId" : ""},
+	{"Prtry" : ""},
+	{"PrtryAmt" : ""},
+	{"PrvtId" : ""},
+	{"Prxy" : ""},
+	{"PstlAdr" : ""},
+	{"Pty" : ""},
+	{"Purp" : ""},
+	{"QtnDt" : ""},
+	{"Rate" : ""},
+	{"Rcrd" : ""},
+	{"RcvgAgt" : ""},
+	{"Refs" : ""},
+	{"RfrdDocAmt" : ""},
+	{"RfrdDocInf" : ""},
+	{"RltdAcct" : ""},
+	{"RltdAgts" : ""},
+	{"RltdDt" : ""},
+	{"RltdPties" : ""},
+	{"RltdRmtInf" : ""},
+	{"RmtInf" : ""},
+	{"RptgSeq" : ""},
+	{"RptgSrc" : ""},
+	{"Rsn" : ""},
+	{"RvslInd" : ""},
+	{"SchmeNm" : ""},
+	{"SfkpgAcct" : ""},
+	{"SplmtryData" : ""},
+	{"SrcCcy" : ""},
+	{"Stmt" : ""},
+	{"StmtPgntn" : ""},
+	{"Strd" : ""},
+	{"Sts" : ""},
+	{"SttlmPlc" : ""},
+	{"SubFmlyCd" : ""},
+	{"SubTp" : ""},
+	{"Sum" : ""},
+	{"Svcr" : ""},
+	{"tags" : ""},
+	{"Tax" : ""},
+	{"TechInptChanl" : ""},
+	{"ToDtTm" : ""},
+	{"ToSeq" : ""},
+	{"Tp" : ""},
+	{"TradgPty" : ""},
+	{"TrgtCcy" : ""},
+	{"TtlAmt" : ""},
+	{"TtlCdtNtries" : ""},
+	{"TtlChrgsAndTaxA" : ""},
+	{"TtlDbtNtries" : ""},
+	{"TtlNetNtry" : ""},
+	{"TtlNtries" : ""},
+	{"TtlNtriesPerBkTxC" : ""},
+	{"TxAmt" : ""},
+	{"TxDtls" : ""},
+	{"TxId" : ""},
+	{"TxsSummry" : ""},
+	{"UETR" : ""},
+	{"UltmtCdtr" : ""},
+	{"UltmtDbtr" : ""},
+	{"UnitCcy" : ""},
+	{"Ustrd" : ""},
+	{"ValDt" : ""},
+	{"XchgRate" : ""}
+]}
\ No newline at end of file
diff --git a/src/xmlparser.py b/src/xmlparser.py
index fd8ed86..62c568b 100644
--- a/src/xmlparser.py
+++ b/src/xmlparser.py
@@ -5,9 +5,19 @@
 import os
 import glob
 import shutil
+import json
 
-# A dictionary to store all the tags found in the input files
-alltags = {}
+# Load camt tags
+with open('static/camt053_Tags.json', 'r') as file:
+    data = json.load(file)
+
+allTags = {}
+
+for item in data['tags']:
+    for key, value in item.items():
+        allTags[key] = value
+
+allTags = dict(sorted(allTags.items()))
 
 allFiles = []
 
@@ -62,33 +72,10 @@ def make_archive():
     os.makedirs('./downloads')
 
 
-def get_all_tags():
-    # set the path of the upload directory
-    path = './uploads/'
-    # set an empty set to store the tags that are already seen
-    seen_tags = set()
-    # loop over each .zip file in the upload directory
-    for filename in glob.glob(os.path.join(path, '*.zip')):
-        # read the content of the .zip file
-        with zipfile.ZipFile(os.path.join(os.getcwd(), filename), 'r') as zf:
-            # loop over each file in the .zip archive
-            for name in zf.namelist():
-                # parse the XML file with BeautifulSoup
-                soup = BeautifulSoup(zf.open(name), 'xml')
-                # find all the tags in the XML file
-                tags = [tag.name for tag in soup.find_all()]
-                # add new tags to the dictionary of all tags
-                for tag in sorted(tags):
-                    if tag not in seen_tags:
-                        alltags[tag] = ''
-                        seen_tags.add(tag)
-
-
 def parse_xml_files():
     # set path and get all tags
     clear_iban_list()
     path = './uploads/'
-    get_all_tags()
     # iterate through zip files in uploads directory
     for filename in glob.glob(os.path.join(path, '*.zip')):
         with zipfile.ZipFile(os.path.join(os.getcwd(), filename), 'r') as zf:
@@ -102,7 +89,7 @@ def parse_xml_files():
                 for ntry in b_ntry:
                     tag_entries = {}
                     # iterate through all tags and find matching entries in xml file
-                    for tag in alltags:
+                    for tag in allTags:
                         a = ntry.find(tag)
                         if tag == 'Cdtr':
                             tag_entries[tag.title()] = a.find('Nm').text if a and a.find('Nm') else ''
@@ -112,5 +99,5 @@ def parse_xml_files():
                             tag_entries[tag.title()] = a.text if a else ''
                     ibanList[x].append(tag_entries)
             zf.close()
-        create_csv(alltags)
-    make_archive()
\ No newline at end of file
+        create_csv(allTags)
+    make_archive()