Skip to content

Commit

Permalink
importer: Remove requirement for filename and lineno metadata
Browse files Browse the repository at this point in the history
Importers have to add "filename" and "lineno" metadata entries to the
generated entries. However, these are used only by the the
bencount.core.data.entry_sortkey() sorting key function when entries
are written in the ledger.

As the Python sort implementation is guaranteed stable, this is at
best not useful and actually problematic when the source document is
sorted in reverse date order and the importer does not explicitly
takes this in account.

Implementing a sortkey() function that does not use these metadata
entries removes the need for the importers to generate them and avoids
the issue with reverse sorting of entries for the same day.

Propagate the change to the base classes and example importers.
  • Loading branch information
dnicolodi committed Feb 6, 2022
1 parent 84a3486 commit 1d6532a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 22 deletions.
21 changes: 19 additions & 2 deletions beangulp/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@
compare = similar.SimilarityComparator()


def sortkey(entry):
"""Comparison key for ledger entries. Sort by date and entry type.
Note that this differs from beancount.core.data.entry_sortkey() by
not including the "filename" and "lineno" metadata entries in the
key. This removes the need for importers to include these metadata
entries for the generated entries.
Args:
entry: A ledger entry.
Returns:
Sorting key.
"""
return (entry.date, data.SORT_ORDER.get(type(entry), 0))


class Importer(abc.ABC):
"""Interface that all source importers need to comply with.
Expand Down Expand Up @@ -173,7 +190,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None:
to sort in descending order. Importers can implement this
method to have entries serialized to file in a specific
order. The default implementation sorts the entries according
to beancount.core.data.entry_sortkey().
to beangulp.importer.sortkey().
Args:
entries: Entries list to sort.
Expand All @@ -183,7 +200,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None:
None.
"""
return entries.sort(key=data.entry_sortkey, reverse=reverse)
return entries.sort(key=sortkey, reverse=reverse)


class ImporterProtocol:
Expand Down
2 changes: 1 addition & 1 deletion beangulp/importers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def get(row, ftype):
currency = get(row, Col.CURRENCY) or self.currency

# Create a transaction
meta = data.new_metadata(file.name, index)
meta = {}
if txn_date is not None:
meta['date'] = parse_date_liberally(txn_date,
self.dateutil_kwds)
Expand Down
16 changes: 5 additions & 11 deletions beangulp/importers/csvbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,7 @@ def extract(self, filepath, existing):
balances = defaultdict(list)
default_account = self.account(filepath)

# Compute the line number of the first data line.
offset = int(self.skiplines) + bool(self.names) + 1

for lineno, row in enumerate(self.read(filepath), offset):
for row in self.read(filepath):
# Skip empty lines.
if not row:
continue
Expand All @@ -301,7 +298,7 @@ def extract(self, filepath, existing):
units = data.Amount(row.amount, currency)

# Create a transaction.
txn = data.Transaction(self.metadata(filepath, lineno, row),
txn = data.Transaction(self.metadata(row),
row.date, flag, payee, row.narration, tags, links, [
data.Posting(account, units, None, None, None, None),
])
Expand All @@ -317,8 +314,7 @@ def extract(self, filepath, existing):
if balance is not None:
date = row.date + datetime.timedelta(days=1)
units = data.Amount(balance, currency)
meta = data.new_metadata(filepath, lineno)
balances[currency].append(data.Balance(meta, date, account, units, None, None))
balances[currency].append(data.Balance({}, date, account, units, None, None))

if not entries:
return []
Expand All @@ -333,22 +329,20 @@ def extract(self, filepath, existing):

return entries

def metadata(self, filepath, lineno, row):
def metadata(self, row):
"""Build transaction metadata dictionary.
This method can be extended to add customized metadata
entries based on the content of the data row.
Args:
filepath: Path to the file being imported.
lineno: Line number of the data being processed.
row: The data row being processed.
Returns:
A metadata dictionary.
"""
return data.new_metadata(filepath, lineno)
return {}

def finalize(self, txn, row):
"""Post process the transaction.
Expand Down
4 changes: 2 additions & 2 deletions beangulp/importers/csvbase_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ class CSVImporter(Base):
data = Amount(4)
names = False

def metadata(self, filepath, lineno, row):
meta = super().metadata(filepath, lineno, row)
def metadata(self, row):
meta = super().metadata(row)
for field in 'meta', 'data':
meta[field] = getattr(row, field)
return meta
Expand Down
8 changes: 2 additions & 6 deletions examples/importers/ofx.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
A sorted list of entries.
"""
new_entries = []
counter = itertools.count()
for acctid, currency, transactions, balance in find_statement_transactions(soup):
if not re.match(acctid_regexp, acctid):
continue
Expand All @@ -122,7 +121,6 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
stmt_entries = []
for stmttrn in transactions:
entry = build_transaction(stmttrn, flag, account, currency)
entry = entry._replace(meta=data.new_metadata(filename, next(counter)))
stmt_entries.append(entry)
stmt_entries = data.sorted(stmt_entries)
new_entries.extend(stmt_entries)
Expand All @@ -137,8 +135,7 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
# it to the following day.
date += datetime.timedelta(days=1)

meta = data.new_metadata(filename, next(counter))
balance_entry = data.Balance(meta, date, account,
balance_entry = data.Balance(None, date, account,
amount.Amount(number, currency),
None, None)
new_entries.append(balance_entry)
Expand Down Expand Up @@ -297,6 +294,5 @@ def build_transaction(stmttrn, flag, account, currency):
posting = data.Posting(account, units, None, None, None, None)

# Build the transaction with a single leg.
fileloc = data.new_metadata('<build_transaction>', 0)
return data.Transaction(fileloc, date, flag, payee, narration,
return data.Transaction({}, date, flag, payee, narration,
data.EMPTY_SET, data.EMPTY_SET, [posting])

0 comments on commit 1d6532a

Please sign in to comment.