Skip to content

Commit

Permalink
Merge pull request #9 from PyCampES/feat/caixabank-parser
Browse files Browse the repository at this point in the history
Add CaixaBank parser, closes #8
  • Loading branch information
toniGrabulosa authored Mar 31, 2024
2 parents 30d00fb + d3c33bc commit 23d586c
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 113 deletions.
44 changes: 26 additions & 18 deletions src/ficamp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from ficamp.classifier.preprocessing import preprocess
from ficamp.datastructures import Tx
from ficamp.parsers.abn import AbnParser
from ficamp.parsers.bbva import AccountBBVAParser, CreditCardBBVAParser
from ficamp.parsers.bsabadell import AccountBSabadellParser, CreditCardBSabadellParser
from ficamp.parsers.caixabank import CaixaBankParser
from ficamp.parsers.enums import BankParser


def cli() -> argparse.Namespace:
Expand All @@ -28,7 +32,10 @@ def cli() -> argparse.Namespace:
# Subparser for the import command
import_parser = subparsers.add_parser("import", help="Import a Transactions")
import_parser.add_argument(
"--bank", choices=["abn"], default="abn", help="Specify the bank for the import"
"--bank",
choices=[e.value for e in BankParser],
default="abn",
help="Specify the bank for the import",
)
import_parser.add_argument("filename", help="File to load")
import_parser.set_defaults(func=import_data)
Expand All @@ -48,23 +55,24 @@ def cli() -> argparse.Namespace:
def import_data(args, engine):
"""Run the parsers."""
print(f"Importing data from {args.filename} for bank {args.bank}.")
# TODO: Build enum for banks
if args.bank == "abn":
parser = AbnParser()
parser.load(args.filename)
transactions = parser.parse()
for tx in transactions:
with Session(engine) as session:
# Assuming 'date' and 'amount' can uniquely identify a transaction
statement = select(Tx).where(
Tx.date == tx.date, Tx.amount == tx.amount, Tx.concept == tx.concept
)
result = session.exec(statement).first()
if result is None: # No existing transaction found
session.add(tx)
session.commit()
else:
print(f"Transaction already exists in the database. {tx}")
parser = BankParser(args.bank).get_parser()
parser.load(args.filename)
save_transactions_to_db(parser.parse(), engine)


def save_transactions_to_db(transactions, engine):
for tx in transactions:
with Session(engine) as session:
# Assuming 'date' and 'amount' can uniquely identify a transaction
statement = select(Tx).where(
Tx.date == tx.date, Tx.amount == tx.amount, Tx.concept == tx.concept
)
result = session.exec(statement).first()
if result is None: # No existing transaction found
session.add(tx)
session.commit()
else:
print(f"Transaction already exists in the database. {tx}")


def get_category_dict(categories_database_path="categories_database.json"):
Expand Down
2 changes: 1 addition & 1 deletion src/ficamp/parsers/abn.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,6 @@ def build_transaction(
currency=_currency,
concept=_concept,
category=None,
metadata={},
tx_metadata={},
tags=[],
)
59 changes: 13 additions & 46 deletions src/ficamp/parsers/bbva.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,30 @@

from openpyxl import load_workbook

from ficamp.datastructures import Tx
from ficamp.datastructures import Currency, Tx
from ficamp.parsers.protocols import Parser


class AccountBBVAParser(Parser):
"""Parser for BBVA bank account extract"""

def load(self, filename: Path | None = None):
# TODO: rearrange this.

# filename = Path("../data/enero-febrero-bbva-cuenta.xlsx")
# filename = os.path.join(
# os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
# "data/enero-febrero-bbva-cuenta.xlsx",
# )

def load(self, filename: Path):
wb = load_workbook(filename)
sheet = wb.active
start_row = 6
start_column = 2

return [
self.rows = [
row
for row in sheet.iter_rows(
min_row=start_row, min_col=start_column, values_only=True
)
]

def parse(self) -> list[Tx]:
rows = self.load()

return [
self.row_processor(row)
for row in rows
for row in self.rows
if self.row_processor(row) is not None
]

Expand All @@ -46,77 +36,54 @@ def row_processor(self, row):
if "targeta" in row[2] or "tarjeta" in row[2]:
return None

if row[5] == "EUR":
currency = "€"

concept = f"{row[2]} || {row[3]}"

return Tx(
date=row[0],
amount=Decimal(str(row[4])),
currency=currency,
currency=Currency(row[5]),
concept=concept,
category=None,
metadata={"more_details": row[8], "origin": "BBVA Account"},
tx_metadata={"more_details": row[8], "origin": "BBVA Account"},
tags=[],
)


class CreditCardBBVAParser(Parser):
"""Parser for BBVA Credit Card Extract"""

def load(self, filename: Path | None = None):
# TODO: rearrange this
# filename = Path("../data/enero-febrero-bbva-cuenta.xlsx")
# filename = os.path.join(
# os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
# "data/enero-febrero-bbva-targeta.xlsx",
# )

def load(self, filename: Path):
wb = load_workbook(filename)
sheet = wb.active
start_row = 6
start_column = 2

return [
self.rows = [
row
for row in sheet.iter_rows(
min_row=start_row, min_col=start_column, values_only=True
)
]

def parse(self) -> list[Tx]:
rows = self.load()

return [
self.row_processor(row)
for row in rows
for row in self.rows
if self.row_processor(row) is not None
]

def row_processor(self, row):
# Skip Recharging the Credit Card, as it's an useless operation from user's POV.
# Skip a positive record, as it's an internal operation for the bank
# to "recharge" the credit card when it gets discounted from associated account
if row[3] > 0:
return None

currency = "€"

return Tx(
date=datetime.strptime(row[0], "%d/%m/%Y"),
amount=Decimal(str(row[3])),
currency=currency,
currency=Currency("EUR"),
concept=row[2],
category=None,
metadata={"origin": "BBVA Credit Card"},
tx_metadata={"origin": "BBVA Credit Card"},
tags=[],
)


if __name__ == "__main__":
bbva = AccountBBVAParser()
txs = bbva.parse()

txs += CreditCardBBVAParser().parse()

for tx in txs:
print(tx)
Original file line number Diff line number Diff line change
@@ -1,44 +1,33 @@
import os
from datetime import datetime
from decimal import Decimal
from pathlib import Path

from openpyxl import load_workbook

from ficamp.datastructures import Tx
from ficamp.datastructures import Currency, Tx
from ficamp.parsers.protocols import Parser


class AccountBSabadellParser(Parser):
"""Parser for BBVA bank account extract"""

def load(self, filename: Path | None = None):
# TODO: rearrange this.

# filename = Path("../data/enero-febrero-bbva-cuenta.xlsx")
# filename = os.path.join(
# os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
# "data/enero-febrero-bsabadell-cuenta.xlsx",
# )
"""Parser for BSabadell bank account extract"""

def load(self, filename: Path):
wb = load_workbook(filename)
sheet = wb.active
start_row = 10
start_column = 1

return [
self.rows = [
row
for row in sheet.iter_rows(
min_row=start_row, min_col=start_column, values_only=True
)
]

def parse(self) -> list[Tx]:
rows = self.load()

return [
self.row_processor(row)
for row in rows
for row in self.rows
if self.row_processor(row) is not None
]

Expand All @@ -47,17 +36,15 @@ def row_processor(self, row):
if "TARJETA CREDITO" in row[2]:
return None

currency = "€"

concept = self.concept_builder(row)

return Tx(
date=datetime.strptime(row[0], "%d/%m/%Y"),
amount=Decimal(str(row[3])),
currency=currency,
currency=Currency("EUR"),
concept=concept,
category=None,
metadata={"origin": "BSABADELL Account"},
tx_metadata={"origin": "BSABADELL Account"},
tags=[],
)

Expand All @@ -76,16 +63,9 @@ def concept_builder(self, row):


class CreditCardBSabadellParser(Parser):
"""Parser for Banc Sabadell Credit Card Extract"""

def load(self, filename: Path | None = None):
# TODO: rearrange this
# filename = Path("../data/enero-febrero-bbva-cuenta.xlsx")
# filename = os.path.join(
# os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
# "data/enero-febrero-bsabadell-targeta.xlsx",
# )
"""Parser for BSabadell Credit Card Extract"""

def load(self, filename: Path):
wb = load_workbook(filename)
sheet = wb.active
start_row = 12
Expand All @@ -96,39 +76,28 @@ def load(self, filename: Path | None = None):
min_row=start_row, min_col=start_column, values_only=True
):
if row[0] is None:
# Mixed content for xlsx file, so we need to break when we reach the end of the table
# Mixed content for xlsx file, so we need to break
# when we reach the end of the main table.
break

rows.append(row)
return rows

self.rows = rows

def parse(self) -> list[Tx]:
rows = self.load()
return [
self.row_processor(row)
for row in rows
for row in self.rows
if self.row_processor(row) is not None
]

def row_processor(self, row):
currency = "€"

return Tx(
date=datetime.strptime(f"{row[0]}/{datetime.now().year}", "%d/%m/%Y"),
amount=Decimal(str(row[4]).replace(",", ".")),
currency=currency,
amount=Decimal(f'-{str(row[4]).replace(",", ".")}'),
currency=Currency("EUR"),
concept=row[1],
category=None,
metadata={"origin": "BSABADELL Credit Card", "location": row[2]},
tx_metadata={"origin": "BSABADELL Credit Card", "tx_location": row[2]},
tags=[],
)


if __name__ == "__main__":
bsabadell = AccountBSabadellParser()
txs = bsabadell.parse()

txs += CreditCardBSabadellParser().parse()

for tx in txs:
print(tx.concept)
45 changes: 45 additions & 0 deletions src/ficamp/parsers/caixabank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import datetime
from decimal import Decimal
from pathlib import Path

from openpyxl import load_workbook

from ficamp.datastructures import Currency, Tx
from ficamp.parsers.protocols import Parser


class CaixaBankParser(Parser):
"""Parser for CaixaBank bank account extract"""

def load(self, filename: Path):
wb = load_workbook(filename)
sheet = wb.active
start_row = 4
start_column = 1

self.rows = [
row
for row in sheet.iter_rows(
min_row=start_row, min_col=start_column, values_only=True
)
]

def parse(self) -> list[Tx]:
return [
self.row_processor(row)
for row in self.rows
if self.row_processor(row) is not None
]

def row_processor(self, row):
concept = f"{row[2]} || {row[3]}"

return Tx(
date=row[0],
amount=Decimal(str(row[4])),
currency=Currency("EUR"),
concept=concept,
category=None,
tx_metadata={"origin": "CaixaBank Account"},
tags=[],
)
Loading

0 comments on commit 23d586c

Please sign in to comment.