-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updated README; moved to v.1.6.3; polished some tests
- Loading branch information
1 parent
f4f33bc
commit ffe60e3
Showing
7 changed files
with
90 additions
and
260 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
# -*- coding: utf-8 -*- | ||
__version__ = '1.6.2' | ||
__version__ = '1.6.3' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,12 @@ | ||
"""Tests for the module `citation_extractor.core`.""" | ||
# -*- coding: utf-8 -*- | ||
# author: Matteo Romanello, [email protected] | ||
|
||
import pytest | ||
import pdb | ||
import pkg_resources | ||
import logging | ||
import pandas as pd | ||
from pytest import fixture | ||
from citation_extractor.Utils.IO import * | ||
from citation_extractor.Utils.IO import annotations2references | ||
from citation_extractor.Utils.IO import load_brat_data | ||
from citation_extractor.Utils.strmatching import StringUtils | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
|
@@ -19,35 +18,66 @@ | |
|
||
|
||
def test_annotations2references(knowledge_base): | ||
datadir = ('citation_extractor','data/aph_corpus/goldset/ann/') | ||
datadir = ('citation_extractor', 'data/aph_corpus/goldset/ann/') | ||
dir = pkg_resources.resource_filename(*datadir) | ||
files = [file.replace('-doc-1.ann','') for file in pkg_resources.resource_listdir(*datadir) if '.ann' in file] | ||
all_annotations = [annotations2references(file, dir, knowledge_base) for file in files] | ||
files = [ | ||
file.replace('-doc-1.ann', '') | ||
for file in pkg_resources.resource_listdir(*datadir) | ||
if '.ann' in file | ||
] | ||
all_annotations = [ | ||
annotations2references(file, dir, knowledge_base) | ||
for file in files[:50] | ||
] | ||
references = reduce((lambda x, y: x + y), all_annotations) | ||
assert references is not None | ||
|
||
#def test_sort_entities(): #TODO implement | ||
# raise NotImplementedError | ||
|
||
#@pytest.mark.skip | ||
def test_load_brat_data(crf_citation_extractor, knowledge_base, postaggers, aph_test_ann_files, aph_titles): | ||
def test_load_brat_data( | ||
crfsuite_citation_extractor, | ||
knowledge_base, postaggers, | ||
aph_test_ann_files, | ||
aph_titles | ||
): | ||
assert crfsuite_citation_extractor is not None | ||
# load the pandas.DataFrame | ||
dataframe = load_brat_data(crf_citation_extractor, knowledge_base, postaggers, aph_test_ann_files, aph_titles) | ||
assert dataframe is not None and type(dataframe)==type(pd.DataFrame()) and dataframe.shape[0]>0 | ||
dataframe = load_brat_data( | ||
crfsuite_citation_extractor, | ||
knowledge_base, | ||
postaggers, | ||
aph_test_ann_files, | ||
aph_titles | ||
) | ||
assert dataframe is not None | ||
assert isinstance(dataframe, pd.DataFrame) | ||
assert dataframe.shape[0] > 0 | ||
|
||
##################### | ||
# Utils.strmatching # | ||
##################### | ||
|
||
def test_utils_stringutils(): | ||
|
||
def test_utils_stringutils(): | ||
strings = [ | ||
("de", u"Wie seine Vorgänger verfolgt auch Ammianus die didaktische Absicht,") | ||
, ("en", u"Judgement of Paris, with actors playing the bribing goddesses, at the end of Book 10 (11, 3-5 : cf. 10, 30-31).") | ||
, ("it", u"Superior e databili tra l'età augustea e il 5° sec. : AE 1952, 16 ; CIL 13, 8648 = ILS 2244 ; AE 1938, 120 ;") | ||
( | ||
"de", | ||
u"Wie seine Vorgänger verfolgt auch\ | ||
Ammianus die didaktische Absicht," | ||
), | ||
( | ||
"en", | ||
u"Judgement of Paris, with actors playing the bribing goddesses,\ | ||
at the end of Book 10 (11, 3-5 : cf. 10, 30-31)." | ||
), | ||
( | ||
"it", | ||
u"Superior e databili tra l'età augustea e il 5° sec. : AE 1952,\ | ||
16 ; CIL 13, 8648 = ILS 2244 ; AE 1938, 120 ;" | ||
) | ||
] | ||
|
||
for language, text in strings: | ||
normalized_text = StringUtils.normalize(text) | ||
normalized_text = StringUtils.normalize(text, language) | ||
normalized_text = StringUtils.normalize(text, language, keep_dots=True) | ||
assert normalized_text is not None |
Oops, something went wrong.