-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_suite.py
178 lines (155 loc) · 8.43 KB
/
test_suite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import pytest
from prisma_automator.collector import Collector
from prisma_automator.splitter import Splitter
class TestSplitter:
@pytest.fixture
def create_splitter(self):
self.splitter = Splitter()
@pytest.fixture
def create_splitter_with_kwgroups(self, create_splitter):
self.kw_groups = [["Operations Research", "Heuristics"], [
"Flexible", "Matrix", "Reconfigurable"], ["Assembly"]]
self.splitter.add_kwgroups(self.kw_groups)
def test_add_kwgroup(self, create_splitter):
kw_group = ["Operations Research", "Heuristics"]
self.splitter.add_kwgroup(kw_group)
assert self.splitter.kw_groups[0] == kw_group
def test_add_kwgroups(self, create_splitter):
kw_groups = [["Operations Research", "Heuristics"], [
"Flexible", "Matrix", "Reconfigurable"], ["Assembly"]]
self.splitter.add_kwgroups(kw_groups)
assert self.splitter.kw_groups == kw_groups
def test_generate_kwgraph(self, create_splitter_with_kwgroups):
expected_graph = {
"": ["Operations Research", "Heuristics"],
"Operations Research": ["Flexible", "Matrix", "Reconfigurable"],
"Heuristics": ["Flexible", "Matrix", "Reconfigurable"],
"Flexible": ["Assembly"],
"Matrix": ["Assembly"],
"Reconfigurable": ["Assembly"],
"Assembly": []
}
graph = self.splitter.generate_kwgraph()
assert graph == expected_graph
def test_generate_combinations(self, create_splitter_with_kwgroups):
expected_combinations = [
['Operations Research', 'Flexible', 'Assembly'],
['Operations Research', 'Matrix', 'Assembly'],
['Operations Research', 'Reconfigurable', 'Assembly'],
['Heuristics', 'Flexible', 'Assembly'],
['Heuristics', 'Matrix', 'Assembly'],
['Heuristics', 'Reconfigurable', 'Assembly']
]
graph = self.splitter.generate_kwgraph()
temp_combination = list()
combinations = list()
combinations = self.splitter.generate_combinations(
temp_combination, combinations, graph, '')
assert combinations == expected_combinations
def test_split(self, create_splitter_with_kwgroups):
expected_splits = [
"\"Operations Research\" AND \"Flexible\" AND \"Assembly\"",
"\"Operations Research\" AND \"Matrix\" AND \"Assembly\"",
"\"Operations Research\" AND \"Reconfigurable\" AND \"Assembly\"",
"\"Heuristics\" AND \"Flexible\" AND \"Assembly\"",
"\"Heuristics\" AND \"Matrix\" AND \"Assembly\"",
"\"Heuristics\" AND \"Reconfigurable\" AND \"Assembly\""
]
splits = self.splitter.split(log=False, save_to="")
assert sorted(splits) == sorted(expected_splits)
def test_split_empty_string_kw(self, create_splitter_with_kwgroups):
expected_splits = [
"\"Operations Research\" AND \"Flexible\" AND \"Assembly\"",
"\"Operations Research\" AND \"Matrix\" AND \"Assembly\"",
"\"Operations Research\" AND \"Reconfigurable\" AND \"Assembly\"",
"\"Heuristics\" AND \"Flexible\" AND \"Assembly\"",
"\"Heuristics\" AND \"Matrix\" AND \"Assembly\"",
"\"Heuristics\" AND \"Reconfigurable\" AND \"Assembly\"",
"\"Operations Research\" AND \"Flexible\" AND \"Assembly\" AND \"Digital Twin\"",
"\"Operations Research\" AND \"Matrix\" AND \"Assembly\" AND \"Digital Twin\"",
"\"Operations Research\" AND \"Reconfigurable\" AND \"Assembly\" AND \"Digital Twin\"",
"\"Heuristics\" AND \"Flexible\" AND \"Assembly\" AND \"Digital Twin\"",
"\"Heuristics\" AND \"Matrix\" AND \"Assembly\" AND \"Digital Twin\"",
"\"Heuristics\" AND \"Reconfigurable\" AND \"Assembly\" AND \"Digital Twin\""
]
kw_group = ["Digital Twin", ""]
self.splitter.add_kwgroup(kw_group)
splits = self.splitter.split(log=False, save_to="")
assert sorted(splits) == sorted(expected_splits)
def test_split_kw_with_OR_operator(self):
expected_splits = [
"\"Operations Research\" AND \"Flexible\"",
"\"Operations Research\" AND (\"Flexible\" OR \"Matrix\")",
"\"Heuristics\" AND \"Flexible\"",
"\"Heuristics\" AND (\"Flexible\" OR \"Matrix\")",
]
self.splitter = Splitter()
self.kw_groups = [
["Operations Research", "Heuristics"],
["Flexible", "Flexible || Matrix"]
]
self.splitter.add_kwgroups(self.kw_groups)
splits = self.splitter.split(log=False, save_to="")
assert sorted(splits) == sorted(expected_splits)
class TestCollector:
""" Tests for the Collector class
- NOTE: these tests take longer to complete because it has API calls. The results
should be saved to memory after the first run.
"""
@pytest.fixture
def create_collector_and_splits(self):
"""
The chosen splits return at least 1 result, but always very few, so that
the search can be executed quickly. It's known that the number of results
can change in the future, but this is the best way found to test the
Collector's methods. As of 02.12.21, total results are 2 + 2 + 23 = 27.
- NOTE: there's a repeated split for the sake of testing the remove duplicates
functionality of the screen() method.
"""
self.splits = [
"\"Digital Twin\" AND \"Augmented Reality\"",
"\"Digital Twin\" AND \"Augmented Reality\"",
"\"Virtual Reality\" AND \"BCI\" AND \"Gaming\""
]
self.collector = Collector()
def test_search_results_df_has_all_expected_columns(self, create_collector_and_splits):
# NOTE: "splits" column is added by the search method.
expected_columns = ['splits', 'eid', 'doi', 'pii', 'pubmed_id', 'title', 'subtype',
'subtypeDescription', 'creator', 'afid', 'affilname',
'affiliation_city', 'affiliation_country', 'author_count',
'author_names', 'author_ids', 'author_afids', 'coverDate',
'coverDisplayDate', 'publicationName', 'issn', 'source_id', 'eIssn',
'aggregationType', 'volume', 'issueIdentifier', 'article_number',
'pageRange', 'description', 'authkeywords', 'citedby_count',
'openaccess', 'freetoread', 'freetoreadLabel', 'fund_acr', 'fund_no', 'fund_sponsor']
all_results = self.collector.search(
splits=self.splits, subscriber=False, threshold=1000, log=False)
df = all_results[0]
assert all(column in df for column in expected_columns) and len(
df.columns) == len(expected_columns)
def test_screen_dropped_unnecessary_columns(self, create_collector_and_splits):
expected_columns = ['splits', 'doi', 'title', 'subtypeDescription', 'creator',
'author_names', 'coverDate', 'volume', 'issueIdentifier', 'pageRange',
'description', 'authkeywords', 'citedby_count', 'openaccess', 'freetoread', 'freetoreadLabel',]
all_results = self.collector.search(
splits=self.splits, subscriber=False, threshold=1000, log=False)
df = all_results[0]
screened_df = self.collector.screen(df=df, log=False)
assert all(column in screened_df for column in expected_columns) and len(
screened_df.columns) == len(expected_columns)
def test_screen_removed_conference_reviews(self, create_collector_and_splits):
all_results = self.collector.search(
splits=self.splits, subscriber=False, threshold=1000, log=False)
df = all_results[0]
screened_df = self.collector.screen(df=df, log=False)
assert 'Conference Review' not in screened_df.subtypeDescription.unique()
def test_screen_removed_duplicates(self, create_collector_and_splits):
"""
Duplicates in this test are checked with both "doi" and "title" columns.
"""
all_results = self.collector.search(
splits=self.splits, subscriber=False, threshold=1000, log=False)
df = all_results[0]
screened_df = self.collector.screen(df=df, log=False)
num_duplicates = screened_df.duplicated(subset=['doi', 'title']).sum()
assert num_duplicates == 0