From b46fd17157e876a63a8b6040eaf1e5e3ca36b342 Mon Sep 17 00:00:00 2001 From: 5yn74x <41291962+plumped@users.noreply.github.com> Date: Mon, 29 May 2023 12:16:07 +0200 Subject: [PATCH] Added camt.053 taglist as jsonfile for tag matching --- src/static/camt053_Tags.json | 174 +++++++++++++++++++++++++++++++++++ src/xmlparser.py | 43 +++------ 2 files changed, 189 insertions(+), 28 deletions(-) create mode 100644 src/static/camt053_Tags.json diff --git a/src/static/camt053_Tags.json b/src/static/camt053_Tags.json new file mode 100644 index 0000000..8e4a47d --- /dev/null +++ b/src/static/camt053_Tags.json @@ -0,0 +1,174 @@ +{"tags": [ + {"Ccy" : ""}, + {"Acct" : ""}, + {"AcctOwnrTxId" : ""}, + {"AcctSvcrRef" : ""}, + {"AcctSvcrTxId" : ""}, + {"AddtlInf" : ""}, + {"AddtlInfInd" : ""}, + {"AddtlNtryInf" : ""}, + {"AddtlStmtInf" : ""}, + {"AddtlTxInf" : ""}, + {"Agt" : ""}, + {"Amt" : ""}, + {"AmtDtls" : ""}, + {"AnncdPstngAmt" : ""}, + {"AnyBIC" : ""}, + {"Avlbty" : ""}, + {"Bal" : ""}, + {"BICFI" : ""}, + {"BkToCstmrStmt" : ""}, + {"BkTxCd" : ""}, + {"BookgDt" : ""}, + {"Br" : ""}, + {"BrnchId" : ""}, + {"Btch" : ""}, + {"CardTx" : ""}, + {"CcyXchg" : ""}, + {"Cd" : ""}, + {"CdOrPrtry" : ""}, + {"CdtDbtInd" : ""}, + {"CdtLine" : ""}, + {"CdtNtries" : ""}, + {"Cdtr" : ""}, + {"CdtrAcct" : ""}, + {"CdtrAgt" : ""}, + {"CdtrRefInf" : ""}, + {"ChqNb" : ""}, + {"ChrgInclInd" : ""}, + {"Chrgs" : ""}, + {"ClrSysId" : ""}, + {"ClrSysMmbId" : ""}, + {"ClrSysRef" : ""}, + {"CntrValAmt" : ""}, + {"ComssnWvrInd" : ""}, + {"CorpActn" : ""}, + {"CpyDplctInd" : ""}, + {"CreDtTm" : ""}, + {"CshDpst" : ""}, + {"CtctDtls" : ""}, + {"CtrctId" : ""}, + {"CtryOfRes" : ""}, + {"DbtNtries" : ""}, + {"Dbtr" : ""}, + {"DbtrAcct" : ""}, + {"DbtrAgt" : ""}, + {"DlvrgAgt" : ""}, + {"Domn" : ""}, + {"Dt" : ""}, + {"DtTm" : ""}, + {"ElctrncSeqNb" : ""}, + {"EndToEndId" : ""}, + {"EQSeq" : ""}, + {"FcstInd" : ""}, + {"FinInstnId" : ""}, + {"Fmly" : ""}, + {"FrDtTm" : ""}, + {"FrToDt" : ""}, + {"FrToSeq" : ""}, + {"GrpHdr" : ""}, + {"IBAN" : ""}, + {"Id" : ""}, + {"InitgPty" : ""}, + {"InstdAgt" : ""}, + {"InstdAmt" : ""}, + {"InstgAgt" : ""}, + {"InstrId" : ""}, + {"IntrmyAgt1" : ""}, + {"IntrmyAgt2" : ""}, + {"IntrmyAgt3" : ""}, + {"Intrst" : ""}, + {"IssgAgt" : ""}, + {"Issr" : ""}, + {"LastPgInd" : ""}, + {"LclInstrm" : ""}, + {"LEI" : ""}, + {"LglSeqNb" : ""}, + {"LineDtls" : ""}, + {"MktInfrstrctrTxId" : ""}, + {"MmbId" : ""}, + {"MndtId" : ""}, + {"MsgId" : ""}, + {"MsgNmId" : ""}, + {"MsgPgntn" : ""}, + {"MsgRcpt" : ""}, + {"Nb" : ""}, + {"NbOfNtries" : ""}, + {"NbOfTxs" : ""}, + {"Nm" : ""}, + {"Ntry" : ""}, + {"NtryDtls" : ""}, + {"NtryRef" : ""}, + {"OrgId" : ""}, + {"OrgnlBizQry" : ""}, + {"OrgnlBkTxCd" : ""}, + {"Orgtr" : ""}, + {"Othr" : ""}, + {"Ownr" : ""}, + {"PgNb" : ""}, + {"PmtInfId" : ""}, + {"PrcgId" : ""}, + {"Prtry" : ""}, + {"PrtryAmt" : ""}, + {"PrvtId" : ""}, + {"Prxy" : ""}, + {"PstlAdr" : ""}, + {"Pty" : ""}, + {"Purp" : ""}, + {"QtnDt" : ""}, + {"Rate" : ""}, + {"Rcrd" : ""}, + {"RcvgAgt" : ""}, + {"Refs" : ""}, + {"RfrdDocAmt" : ""}, + {"RfrdDocInf" : ""}, + {"RltdAcct" : ""}, + {"RltdAgts" : ""}, + {"RltdDt" : ""}, + {"RltdPties" : ""}, + {"RltdRmtInf" : ""}, + {"RmtInf" : ""}, + {"RptgSeq" : ""}, + {"RptgSrc" : ""}, + {"Rsn" : ""}, + {"RvslInd" : ""}, + {"SchmeNm" : ""}, + {"SfkpgAcct" : ""}, + {"SplmtryData" : ""}, + {"SrcCcy" : ""}, + {"Stmt" : ""}, + {"StmtPgntn" : ""}, + {"Strd" : ""}, + {"Sts" : ""}, + {"SttlmPlc" : ""}, + {"SubFmlyCd" : ""}, + {"SubTp" : ""}, + {"Sum" : ""}, + {"Svcr" : ""}, + {"tags" : ""}, + {"Tax" : ""}, + {"TechInptChanl" : ""}, + {"ToDtTm" : ""}, + {"ToSeq" : ""}, + {"Tp" : ""}, + {"TradgPty" : ""}, + {"TrgtCcy" : ""}, + {"TtlAmt" : ""}, + {"TtlCdtNtries" : ""}, + {"TtlChrgsAndTaxA" : ""}, + {"TtlDbtNtries" : ""}, + {"TtlNetNtry" : ""}, + {"TtlNtries" : ""}, + {"TtlNtriesPerBkTxC" : ""}, + {"TxAmt" : ""}, + {"TxDtls" : ""}, + {"TxId" : ""}, + {"TxsSummry" : ""}, + {"UETR" : ""}, + {"UltmtCdtr" : ""}, + {"UltmtDbtr" : ""}, + {"UnitCcy" : ""}, + {"Ustrd" : ""}, + {"ValDt" : ""}, + {"XchgRate" : ""} +]} \ No newline at end of file diff --git a/src/xmlparser.py b/src/xmlparser.py index fd8ed86..62c568b 100644 --- a/src/xmlparser.py +++ b/src/xmlparser.py @@ -5,9 +5,19 @@ import os import glob import shutil +import json -# A dictionary to store all the tags found in the input files -alltags = {} +# Load camt tags +with open('static/camt053_Tags.json', 'r') as file: + data = json.load(file) + +allTags = {} + +for item in data['tags']: + for key, value in item.items(): + allTags[key] = value + +allTags = dict(sorted(allTags.items())) allFiles = [] @@ -62,33 +72,10 @@ def make_archive(): os.makedirs('./downloads') -def get_all_tags(): - # set the path of the upload directory - path = './uploads/' - # set an empty set to store the tags that are already seen - seen_tags = set() - # loop over each .zip file in the upload directory - for filename in glob.glob(os.path.join(path, '*.zip')): - # read the content of the .zip file - with zipfile.ZipFile(os.path.join(os.getcwd(), filename), 'r') as zf: - # loop over each file in the .zip archive - for name in zf.namelist(): - # parse the XML file with BeautifulSoup - soup = BeautifulSoup(zf.open(name), 'xml') - # find all the tags in the XML file - tags = [tag.name for tag in soup.find_all()] - # add new tags to the dictionary of all tags - for tag in sorted(tags): - if tag not in seen_tags: - alltags[tag] = '' - seen_tags.add(tag) - - def parse_xml_files(): # set path and get all tags clear_iban_list() path = './uploads/' - get_all_tags() # iterate through zip files in uploads directory for filename in glob.glob(os.path.join(path, '*.zip')): with zipfile.ZipFile(os.path.join(os.getcwd(), filename), 'r') as zf: @@ -102,7 +89,7 @@ def parse_xml_files(): for ntry in b_ntry: tag_entries = {} # iterate through all tags and find matching entries in xml file - for tag in alltags: + for tag in allTags: a = ntry.find(tag) if tag == 'Cdtr': tag_entries[tag.title()] = a.find('Nm').text if a and a.find('Nm') else '' @@ -112,5 +99,5 @@ def parse_xml_files(): tag_entries[tag.title()] = a.text if a else '' ibanList[x].append(tag_entries) zf.close() - create_csv(alltags) - make_archive() \ No newline at end of file + create_csv(allTags) + make_archive()