Skip to content

Commit

Permalink
proper testing of data_to_xml and write_file
Browse files Browse the repository at this point in the history
  • Loading branch information
Olthoff231381 committed Sep 24, 2024
1 parent e4e3e56 commit b09e019
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ dmypy.json
*.xml*
*.html*
!mailcom/test/data/*.eml
!mailcom/test/data/*.xml

# models
test/models
2 changes: 1 addition & 1 deletion mailcom/inout.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def validate_data(self):

def data_to_xml(self, text):
my_item_func = lambda x: 'content'
xml = dicttoxml(text, custom_root='email', item_func = my_item_func) # Different options for review
xml = dicttoxml(text, custom_root='email', item_func = my_item_func)
return xml.decode()

def write_file(self, text: str, name: str)-> None:
Expand Down
14 changes: 7 additions & 7 deletions mailcom/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ def make_dir(path: str):
# process the text
io = InoutHandler(path_input)
io.list_of_files()
io = InoutHandler(path_input)
io.list_of_files()
# html_files = list_of_files(path_input, "html")
for file in io.email_list:
text = io.get_text(file)
Expand All @@ -133,16 +131,18 @@ def make_dir(path: str):
# print(io.email_content["attachement type"])
# skip this text if email could not be parsed
if not text:
continue
# doc_spacy = nlp_spacy(text)
continue

Check warning on line 134 in mailcom/parse.py

View check run for this annotation

Codecov / codecov/patch

mailcom/parse.py#L134

Added line #L134 was not covered by tests
### nlp = init_spacy(sprache)
# doc_spacy = nlp_spacy(text) ### fehlt - alte version
# text = get_sentences(doc_spacy)
# start with first line
# here you can limit the number of sentences to parse
# newlist = []
# max_i = len(text)
# max_i = len(text) ### weg
### init transformers
# for i in range(0, max_i):
# if tool == "transformers":
# nlps = nlp_transformers(text[i])
# if tool == "transformers": ### gibt nur eins
# nlps = nlp_transformers(text[i]) ### fehlty bzw process_doc
# doc = nlps
# newlist.append(process_doc(doc, ner_tool=tool, text=text[i]))
# newlist[i] = " ".join(newlist[i])
Expand Down
1 change: 1 addition & 0 deletions mailcom/test/data/test.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<?xml version="1.0" encoding="UTF-8" ?><email><content type="str">This is nothing more than a test</content><date type="str">2024-04-17T15:13:56+00:00</date><attachment type="int">2</attachment><attachement_type type="list"><content type="str">jpg</content></attachement_type></email>
14 changes: 11 additions & 3 deletions mailcom/test/test_inout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from pathlib import Path
from importlib import resources
import datetime
import filecmp

pkg = resources.files("mailcom")

FILE_PATH = Path(pkg / "test" / "data" / "Bonjour Agathe.eml")
XML_PATH = Path(pkg / "test" / "data" / "test.out")

TEXT_REF = "J'espère que tu vas bien!"
XML_REF = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?><email><content type=\"str\">"
Expand Down Expand Up @@ -44,13 +46,19 @@ def test_get_text(get_instant):
assert get_instant.email_content["attachement type"] == ['jpg', 'jpg']
with pytest.raises(OSError):
get_instant.get_text(get_instant.directory_name / "nonexisting.eml")
return text

def test_get_html_text(get_instant):
html = """<html><head><title>Test</title></head></html>"""
assert get_instant.get_html_text(html) == 'Test'
noHtml = """Test"""
assert get_instant.get_html_text(noHtml) == 'Test'

def test_data_to_xml(get_instant):
assert get_instant.data_to_xml(test_get_text)[0:66] == XML_REF
def test_data_to_xml(get_instant,tmp_path):
xml_content = {"content": "This is nothing more than a test",
"date": "2024-04-17T15:13:56+00:00",
"attachment": 2,
"attachement type": {'jpg', 'jpg'}
}
xml = get_instant.data_to_xml(xml_content)
get_instant.write_file(xml, tmp_path / "test")
assert filecmp.cmp(XML_PATH, tmp_path / "test.out")

0 comments on commit b09e019

Please sign in to comment.