forked from pr0kary0te/minimalmarkers
-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_minimalmarkers.py
89 lines (70 loc) · 2.81 KB
/
test_minimalmarkers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from minimalmarkers import load_patterns, find_best_patterns, \
calculate_best_possible_score, \
get_unresolved, Patterns
def test_minimal_markers():
"""Validates that the AppleGenotypes.csv file is loaded
and processed correctly
"""
patterns = load_patterns("example/AppleGenotypes.csv")
assert patterns is not None
assert type(patterns) == Patterns
n_varieties = 260
n_patterns = 1269
assert len(patterns.varieties) == n_varieties
assert len(patterns.ids) == n_patterns
assert len(patterns.mafs) == n_patterns
assert patterns.patterns.shape == (n_patterns, n_varieties)
best_score = calculate_best_possible_score(patterns)
assert best_score == 33669
best_patterns = find_best_patterns(patterns)
# The score from the last pattern must give the best
# possible score
assert best_patterns[-1][1] == best_score
# The top 9 are pretty stable
correct_result = [(610, 21931),
(763, 29506),
(718, 32113),
(915, 33083),
(468, 33445),
(352, 33570),
(567, 33609),
(786, 33629),
(416, 33641),
(933, 33647),
(1222, 33652),
(421, 33656),
(1097, 33659),
(191, 33661),
(961, 33663),
(358, 33664),
(436, 33665),
(296, 33666),
(888, 33667),
(490, 33668),
(351, 33669)]
for i in range(0, 9):
assert best_patterns[i][1] == correct_result[i][1]
assert best_patterns[i][0] == correct_result[i][0]
# This last test is a bit fragile - it is possible that a slightly
# different ordering can lead to a different number of patterns
# found - the algorithm is inherently non-reproducible for the
# later patterns that are found. This doesn't matter as long as
# the patterns that have been selected give the same score as
# the theoretical maximum possible
if len(best_patterns) == len(correct_result):
for i in range(0, len(best_patterns)):
assert best_patterns[i][1] == correct_result[i][1]
# Check that the right varieties are not resolved
unresolved = get_unresolved(patterns, best_patterns)
correct_unresolved = [('Willy', 'Connie_2270')]
assert unresolved == correct_unresolved
if __name__ == "__main__":
try:
test_minimal_markers()
print("PASSED")
except Exception:
import traceback
traceback.print_exc()
print("\nFAILED")
import sys
sys.exit(-1)