From d55f20a422e31e5415206dcc2e0ee0221593b026 Mon Sep 17 00:00:00 2001 From: bosd Date: Sat, 14 Oct 2023 22:24:16 +0200 Subject: [PATCH] restore stream loader + loader refactor --- src/invoice2data/extract/loader.py | 72 +++++++++++++++++++++--------- tests/test_loader.py | 63 ++++++++++++++++++++++++-- 2 files changed, 110 insertions(+), 25 deletions(-) diff --git a/src/invoice2data/extract/loader.py b/src/invoice2data/extract/loader.py index 80a75e96..77e173f8 100644 --- a/src/invoice2data/extract/loader.py +++ b/src/invoice2data/extract/loader.py @@ -6,11 +6,13 @@ import os import json + try: from yaml import load, YAMLError, CSafeLoader as SafeLoader except ImportError: # pragma: no cover from yaml import load, SafeLoader, YAMLError import pkg_resources + from logging import getLogger from .invoice_template import InvoiceTemplate import codecs @@ -18,6 +20,25 @@ logger = getLogger(__name__) +def ordered_load(stream, Loader=json.loads): + """loads a stream of json data""" + + output = [] + + try: + tpl_stream = json.loads(stream) + except ValueError as error: + logger.warning("json Loader Failed to load template stream\n%s", error) + return + # always pre-process template to remain backwards compatability + for tpl in tpl_stream: + tpl = prepare_template(tpl) + if tpl: + output.append(InvoiceTemplate(tpl)) + + return output + + def read_templates(folder=None): """ Load yaml templates from template folder. Return list of dicts. @@ -76,35 +97,42 @@ def read_templates(folder=None): try: tpl = json.loads(template_file.read()) except ValueError as error: - logger.warning("json Loader Failed to load %s template:\n%s", name, error) + logger.warning( + "json Loader Failed to load %s template:\n%s", name, error + ) continue tpl["template_name"] = name + tpl = prepare_template(tpl) - # Test if all required fields are in template - if "keywords" not in tpl.keys(): - logger.warning( - "Failed to load template %s Missing mandatory 'keywords' field.", - name, - ) - continue + if tpl: + output.append(InvoiceTemplate(tpl)) - # Convert keywords to list, if only one - if not isinstance(tpl["keywords"], list): - tpl["keywords"] = [tpl["keywords"]] + logger.info("Loaded %d templates from %s", len(output), folder) + return output - # Set excluded_keywords as empty list, if not provided - if "exclude_keywords" not in tpl.keys(): - tpl["exclude_keywords"] = [] - # Convert excluded_keywords to list, if only one - if not isinstance(tpl["exclude_keywords"], list): - tpl["exclude_keywords"] = [tpl["exclude_keywords"]] +def prepare_template(tpl): + # Test if all required fields are in template + if "keywords" not in tpl.keys(): + logger.warning( + "Failed to load template %s Missing mandatory 'keywords' field.", + tpl["template_name"], + ) + # continue + return None - if "priority" not in tpl.keys(): - tpl["priority"] = 5 + # Convert keywords to list, if only one + if not isinstance(tpl["keywords"], list): + tpl["keywords"] = [tpl["keywords"]] - output.append(InvoiceTemplate(tpl)) + # Set excluded_keywords as empty list, if not provided + if "exclude_keywords" not in tpl.keys(): + tpl["exclude_keywords"] = [] - logger.info("Loaded %d templates from %s", len(output), folder) + # Convert excluded_keywords to list, if only one + if not isinstance(tpl["exclude_keywords"], list): + tpl["exclude_keywords"] = [tpl["exclude_keywords"]] - return output + if "priority" not in tpl.keys(): + tpl["priority"] = 5 + return tpl diff --git a/tests/test_loader.py b/tests/test_loader.py index 0254001d..d3956f00 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -3,9 +3,10 @@ import os import pytest +import unittest from invoice2data.extract.invoice_template import InvoiceTemplate -from invoice2data.extract.loader import read_templates +from invoice2data.extract.loader import read_templates, ordered_load @pytest.fixture @@ -30,6 +31,62 @@ def test_default_templates_are_loaded(): assert all(isinstance(template, InvoiceTemplate) for template in templates) +def test_templates_stream_loader(): + tpl_stream = ( + '[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":' + ' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",' + ' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt", "viavia.com"]}]' + ) + + templates = ordered_load(stream=tpl_stream) + + print("Amount of stream loaded templates %s" % len(templates)) + assert len(templates) == 2 + assert all(isinstance(template, InvoiceTemplate) for template in templates) + + +class MyTestCase(unittest.TestCase): + def test_templates_invalid_stream_loader(self): + invalid_tpl_stream = ( + ',,,[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":' + ' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",' + ' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt",' + ' "viavia.com"]}]' + ) + + with self.assertLogs("", level="DEBUG") as cm: + ordered_load(stream=invalid_tpl_stream) + print(cm.output) + self.assertEqual( + cm.output, + [ + "WARNING:invoice2data.extract.loader:json Loader Failed to load template stream\nExpecting value: line" + " 1 column 1 (char 0)" + ], + ) + + +def test_default_templates_and_stream_loaded(): + tpl_stream = ( + '[{"issuer":"first biz", "name": "first template", "department":"purchase", "parser":"static", "value":' + ' "NL82338015B01", "keywords": ["Receipt", "va.nl"]}, {"issuer":"second biz", "name": "2nd template",' + ' "department":"purchase", "parser":"static", "value": "NL828015B01", "keywords": ["Receipt", "viavia.com"]}]' + ) + + stream_templates = ordered_load(stream=tpl_stream) + + print("Amount of stream loaded templates %s" % len(stream_templates)) + templates = read_templates() + builtin_tpl_folder = "./src/invoice2data/extract/templates" + qty_templ_files = sum(len(files) for _, _, files in os.walk(builtin_tpl_folder)) + + print("Amount of default loaded templates %s" % len(templates)) + templates += stream_templates + assert len(templates) == qty_templ_files + 2 + assert all(isinstance(template, InvoiceTemplate) for template in templates) + print(templates) + + def test_template_with_missing_keywords_is_not_loaded(templatedirectory: Path): yamlfile = templatedirectory / "template_with_missing_keywords.yml" yamlfile.write_text(template_with_missing_keywords, encoding="utf-8") @@ -61,7 +118,7 @@ def test_template_with_keyword_is_not_list(templatedirectory: Path): yamlfile.write_text(template_keyword_not_list, encoding="utf-8") tpl = read_templates(str(templatedirectory)) - assert tpl[0]["keywords"] == ['Basic Test'] + assert tpl[0]["keywords"] == ["Basic Test"] def test_template_with_exclude_keyword_is_not_list(templatedirectory: Path): @@ -69,7 +126,7 @@ def test_template_with_exclude_keyword_is_not_list(templatedirectory: Path): yamlfile.write_text(template_exclude_keyword_not_list, encoding="utf-8") tpl = read_templates(str(templatedirectory)) - assert tpl[0]["exclude_keywords"] == ['Exclude_this'] + assert tpl[0]["exclude_keywords"] == ["Exclude_this"] def test_template_bad_yaml_format_not_loaded(templatedirectory: Path):