-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from blalop/master
Release 1.0.0: Refactoring of the API
- Loading branch information
Showing
18 changed files
with
177 additions
and
126 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from bbva2pandas.report import Report |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import re | ||
|
||
import pandas as pd | ||
|
||
DF_COLUMNS = ['date', 'value_date', 'concept', | ||
'amount', 'balance', | ||
'card', 'subconcept'] | ||
|
||
|
||
def _trim_string(col: pd.Series) -> pd.Series: | ||
"""Remove unnecesary whitespaces""" | ||
return col.apply(lambda x: re.sub(r'\s+', ' ', x).strip()) | ||
|
||
|
||
def _transform_decimal_separator(col: pd.Series) -> pd.Series: | ||
"""Parses the decimal separator from ',' to '.'""" | ||
col = col.apply(lambda x: x.replace('.', '').replace(',', '.')) | ||
return pd.to_numeric(col) | ||
|
||
|
||
def _format_date(col: pd.Series, year: str) -> pd.Series: | ||
"""Formats the date in Pandas format""" | ||
return pd.to_datetime(col + '/' + year, dayfirst=True) | ||
|
||
|
||
def build_dataframe(movements: list, year: str) -> pd.DataFrame: | ||
"""Builds a dataframe from the report data""" | ||
df = pd.DataFrame(movements, columns=DF_COLUMNS) | ||
|
||
df.concept = _trim_string(df.concept) | ||
df.subconcept = _trim_string(df.subconcept) | ||
df.date = _format_date(df.date, year) | ||
df.value_date = _format_date(df.value_date, year) | ||
df.amount = _transform_decimal_separator(df.amount) | ||
df.balance = _transform_decimal_separator(df.balance) | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import re | ||
|
||
YEAR_FIND_REGEX = re.compile(r'EXTRACTO DE \w* (\d{4})', re.MULTILINE) | ||
|
||
MOVEMENTS_PARSE_REGEX = re.compile( | ||
r'''^ | ||
(\d\d/\d\d) #date | ||
\s | ||
(\d\d/\d\d) #value date | ||
\s* | ||
([A-ZÑÁÉÍÓÚÜ\'\,\.\:\s]+) #concept | ||
\s* | ||
(-?\d*.?\d*,\d*) #amount of the movement | ||
\s* | ||
(\d*.?\d*,\d*) #balance after movement | ||
\s* | ||
(\d*) # credit card number | ||
\s* | ||
([\d\wÑÁÉÍÓÚÜ \.\,\:\*\'\-\/\(\)]*) # subconcept | ||
$''', | ||
re.MULTILINE | re.IGNORECASE | re.VERBOSE | ||
) | ||
|
||
|
||
def find_movements(content: str) -> list: | ||
"""Searches the file content for movements""" | ||
return MOVEMENTS_PARSE_REGEX.findall(content) | ||
|
||
|
||
def find_year(content: str) -> str: | ||
"""Extracts the year from file content""" | ||
return YEAR_FIND_REGEX.findall(content)[0] |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import pdftotext | ||
|
||
from bbva2pandas.typing import FilePathOrBuffer | ||
|
||
|
||
def read_pdf(filepath: FilePathOrBuffer) -> str: | ||
"""Reads the PDF""" | ||
if isinstance(filepath, str): | ||
with open(filepath, 'rb') as f: | ||
return '\n'.join(pdftotext.PDF(f)) | ||
else: | ||
return '\n'.join(pdftotext.PDF(filepath)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import pandas as pd | ||
|
||
from bbva2pandas.extractor import find_movements, find_year | ||
from bbva2pandas.pdf import read_pdf | ||
from bbva2pandas.dataframe import build_dataframe | ||
from bbva2pandas.typing import FilePathOrBuffer | ||
|
||
|
||
class Report: | ||
def __init__(self, filepath: FilePathOrBuffer) -> None: | ||
self.content = read_pdf(filepath) | ||
self.year = find_year(self.content) | ||
self.movements = find_movements(self.content) | ||
|
||
def to_df(self) -> pd.DataFrame: | ||
"""Receives a filename and parses it to Dataframe""" | ||
return build_dataframe(self.movements, self.year) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import io | ||
import typing | ||
|
||
FilePathOrBuffer = typing.Union[str, io.TextIOWrapper] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import unittest | ||
|
||
import pandas as pd | ||
from pandas._libs.tslibs.timestamps import Timestamp | ||
|
||
from bbva2pandas import dataframe | ||
|
||
|
||
class TestDataframe(unittest.TestCase): | ||
def test_trim_string(self): | ||
original = pd.Series(['a', 'a b ', 'a b c']) | ||
expected = ['a', 'a b', 'a b c'] | ||
actual = dataframe._trim_string(original) | ||
self.assertEqual(expected, actual.to_list()) | ||
|
||
def test_transform_decimal_separator(self): | ||
original = pd.Series(['23,45', '1.500,00']) | ||
expected = [23.45, 1500] | ||
actual = dataframe._transform_decimal_separator(original) | ||
self.assertEqual(expected, actual.to_list()) | ||
|
||
def test_format_date(self): | ||
original = pd.Series(['12/10', '1/1']) | ||
expected = [Timestamp('2020-10-12 00:00:00'), | ||
Timestamp('2020-01-01 00:00:00')] | ||
actual = dataframe._format_date(original, '2020') | ||
self.assertEqual(expected, actual.to_list()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
|
||
from bbva2pandas import extractor | ||
|
||
|
||
class TestExtractor(unittest.TestCase): | ||
def test_year_extraction(self): | ||
with open('tests/data/pdf-content.txt') as f: | ||
input = f.read() | ||
year = extractor.find_year(input) | ||
self.assertEqual('2050', year) | ||
|
||
def test_movements_extraction(self): | ||
with open('tests/data/pdf-content.txt') as f: | ||
input = f.read() | ||
movements = extractor.find_movements(input) | ||
expected = [('05/08', '05/08', 'TRANSFERENCIAS ', '42,00', '42,00', '', 'X')] | ||
self.assertEqual(1, len(movements)) | ||
self.assertEqual(expected, movements) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import unittest | ||
|
||
from bbva2pandas import pdf | ||
|
||
|
||
class TestExtractor(unittest.TestCase): | ||
FILEPATH = 'tests/data/abcdef.pdf' | ||
|
||
def test_with_file_open(self): | ||
with open(self.FILEPATH, 'rb') as f: | ||
content = pdf.read_pdf(f) | ||
self.assertEqual('abcdef', content) | ||
|
||
def test_with_file_path(self): | ||
content = pdf.read_pdf(self.FILEPATH) | ||
self.assertEqual('abcdef', content) |
This file was deleted.
Oops, something went wrong.