Skip to content

Commit

Permalink
restore stream loader + loader refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
bosd committed Oct 14, 2023
1 parent 3807bce commit d55f20a
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 25 deletions.
72 changes: 50 additions & 22 deletions src/invoice2data/extract/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,39 @@

import os
import json

try:
from yaml import load, YAMLError, CSafeLoader as SafeLoader
except ImportError: # pragma: no cover
from yaml import load, SafeLoader, YAMLError
import pkg_resources

from logging import getLogger
from .invoice_template import InvoiceTemplate
import codecs

logger = getLogger(__name__)


def ordered_load(stream, Loader=json.loads):
"""loads a stream of json data"""

output = []

try:
tpl_stream = json.loads(stream)
except ValueError as error:
logger.warning("json Loader Failed to load template stream\n%s", error)
return
# always pre-process template to remain backwards compatability
for tpl in tpl_stream:
tpl = prepare_template(tpl)
if tpl:
output.append(InvoiceTemplate(tpl))

return output


def read_templates(folder=None):
"""
Load yaml templates from template folder. Return list of dicts.
Expand Down Expand Up @@ -76,35 +97,42 @@ def read_templates(folder=None):
try:
tpl = json.loads(template_file.read())
except ValueError as error:
logger.warning("json Loader Failed to load %s template:\n%s", name, error)
logger.warning(
"json Loader Failed to load %s template:\n%s", name, error
)
continue
tpl["template_name"] = name
tpl = prepare_template(tpl)

# Test if all required fields are in template
if "keywords" not in tpl.keys():
logger.warning(
"Failed to load template %s Missing mandatory 'keywords' field.",
name,
)
continue
if tpl:
output.append(InvoiceTemplate(tpl))

# Convert keywords to list, if only one
if not isinstance(tpl["keywords"], list):
tpl["keywords"] = [tpl["keywords"]]
logger.info("Loaded %d templates from %s", len(output), folder)
return output

# Set excluded_keywords as empty list, if not provided
if "exclude_keywords" not in tpl.keys():
tpl["exclude_keywords"] = []

# Convert excluded_keywords to list, if only one
if not isinstance(tpl["exclude_keywords"], list):
tpl["exclude_keywords"] = [tpl["exclude_keywords"]]
def prepare_template(tpl):
# Test if all required fields are in template
if "keywords" not in tpl.keys():
logger.warning(
"Failed to load template %s Missing mandatory 'keywords' field.",
tpl["template_name"],
)
# continue
return None

if "priority" not in tpl.keys():
tpl["priority"] = 5
# Convert keywords to list, if only one
if not isinstance(tpl["keywords"], list):
tpl["keywords"] = [tpl["keywords"]]

output.append(InvoiceTemplate(tpl))
# Set excluded_keywords as empty list, if not provided
if "exclude_keywords" not in tpl.keys():
tpl["exclude_keywords"] = []

logger.info("Loaded %d templates from %s", len(output), folder)
# Convert excluded_keywords to list, if only one
if not isinstance(tpl["exclude_keywords"], list):
tpl["exclude_keywords"] = [tpl["exclude_keywords"]]

return output
if "priority" not in tpl.keys():
tpl["priority"] = 5
return tpl
63 changes: 60 additions & 3 deletions tests/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

import os
import pytest
import unittest

from invoice2data.extract.invoice_template import InvoiceTemplate
from invoice2data.extract.loader import read_templates
from invoice2data.extract.loader import read_templates, ordered_load


@pytest.fixture
Expand All @@ -30,6 +31,62 @@ def test_default_templates_are_loaded():
assert all(isinstance(template, InvoiceTemplate) for template in templates)


def test_templates_stream_loader():
tpl_stream = (
'[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":'
' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",'
' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt", "viavia.com"]}]'
)

templates = ordered_load(stream=tpl_stream)

print("Amount of stream loaded templates %s" % len(templates))
assert len(templates) == 2
assert all(isinstance(template, InvoiceTemplate) for template in templates)


class MyTestCase(unittest.TestCase):
def test_templates_invalid_stream_loader(self):
invalid_tpl_stream = (
',,,[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":'
' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",'
' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt",'
' "viavia.com"]}]'
)

with self.assertLogs("", level="DEBUG") as cm:
ordered_load(stream=invalid_tpl_stream)
print(cm.output)
self.assertEqual(
cm.output,
[
"WARNING:invoice2data.extract.loader:json Loader Failed to load template stream\nExpecting value: line"
" 1 column 1 (char 0)"
],
)


def test_default_templates_and_stream_loaded():
tpl_stream = (
'[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":'
' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",'
' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt", "viavia.com"]}]'
)

stream_templates = ordered_load(stream=tpl_stream)

print("Amount of stream loaded templates %s" % len(stream_templates))
templates = read_templates()
builtin_tpl_folder = "./src/invoice2data/extract/templates"
qty_templ_files = sum(len(files) for _, _, files in os.walk(builtin_tpl_folder))

print("Amount of default loaded templates %s" % len(templates))
templates += stream_templates
assert len(templates) == qty_templ_files + 2
assert all(isinstance(template, InvoiceTemplate) for template in templates)
print(templates)


def test_template_with_missing_keywords_is_not_loaded(templatedirectory: Path):
yamlfile = templatedirectory / "template_with_missing_keywords.yml"
yamlfile.write_text(template_with_missing_keywords, encoding="utf-8")
Expand Down Expand Up @@ -61,15 +118,15 @@ def test_template_with_keyword_is_not_list(templatedirectory: Path):
yamlfile.write_text(template_keyword_not_list, encoding="utf-8")

tpl = read_templates(str(templatedirectory))
assert tpl[0]["keywords"] == ['Basic Test']
assert tpl[0]["keywords"] == ["Basic Test"]


def test_template_with_exclude_keyword_is_not_list(templatedirectory: Path):
yamlfile = templatedirectory / "excludekeywordnotlist.yml"
yamlfile.write_text(template_exclude_keyword_not_list, encoding="utf-8")

tpl = read_templates(str(templatedirectory))
assert tpl[0]["exclude_keywords"] == ['Exclude_this']
assert tpl[0]["exclude_keywords"] == ["Exclude_this"]


def test_template_bad_yaml_format_not_loaded(templatedirectory: Path):
Expand Down

0 comments on commit d55f20a

Please sign in to comment.