-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtest_seg.py
119 lines (104 loc) · 3.38 KB
/
test_seg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import pytest
from genie_registry.seg import seg
@pytest.fixture
def seg_class(syn):
return seg(syn, "SAGE")
def test_processing(seg_class):
expectedSegDf = pd.DataFrame(
{
"ID": [
"GENIE-SAGE-ID1-1",
"GENIE-SAGE-ID2-1",
"GENIE-SAGE-ID3-1",
"GENIE-SAGE-ID4-1",
"GENIE-SAGE-ID5-1",
],
"CHROM": ["1", "2", "3", "4", "5"],
"LOCSTART": [1, 2, 3, 4, 3],
"LOCEND": [1, 2, 3, 4, 2],
"NUMMARK": [1, 2, 3, 4, 3],
"SEGMEAN": [1, 2, 3.9, 4, 3],
"CENTER": ["SAGE", "SAGE", "SAGE", "SAGE", "SAGE"],
}
)
segDf = pd.DataFrame(
{
"ID": [
"GENIE-SAGE-ID1-1",
"GENIE-SAGE-ID2-1",
"GENIE-SAGE-ID3-1",
"GENIE-SAGE-ID4-1",
"GENIE-SAGE-ID5-1",
],
"CHROM": ["chr1", 2, 3, 4, 5],
"LOC.START": [1, 2, 3, 4, 3],
"LOC.END": [1, 2, 3, 4, 2],
"NUM.MARK": [1, 2, 3, 4, 3],
"SEG.MEAN": [1, 2, 3.9, 4, 3],
}
)
newSegDf = seg_class._process(segDf)
assert expectedSegDf.equals(newSegDf[expectedSegDf.columns])
def test_validation_filename(seg_class):
with pytest.raises(AssertionError):
seg_class.validateFilename(["foo"])
assert seg_class.validateFilename(["genie_data_cna_hg19_SAGE.seg"]) == "seg"
def test_validation_perfect(seg_class):
segDf = pd.DataFrame(
{
"ID": [
"GENIE-SAGE-ID1",
"GENIE-SAGE-ID2",
"GENIE-SAGE-ID3",
"GENIE-SAGE-ID4",
"GENIE-SAGE-ID5",
],
"CHROM": [1, 2, 3, 4, 5],
"LOC.START": [1, 2, 3, 4, 3],
"LOC.END": [1, 2, 3, 4, 3],
"NUM.MARK": [1, 2, 3, 4, 3],
"SEG.MEAN": [1, 2, 3, 4, 3],
}
)
error, warning = seg_class._validate(segDf)
assert error == ""
assert warning == ""
def test_valdation_invalid(seg_class):
segDf = pd.DataFrame(
{
"ID": ["ID1", "ID2", "ID3", "ID4", "ID5"],
"CHROM": [1, 2, float("nan"), 4, 5],
"LOC.START": [1, 2, 3, 4, float("nan")],
"LOC.END": [1, 2, 3, float("nan"), 3],
"NUM.MARK": [1, 2, 3, 4, 3],
}
)
expectedErrors = (
"Your seg file is missing these headers: SEG.MEAN.\n"
"Seg: No null or empty values allowed in column(s): "
"CHROM, LOC.END, LOC.START.\n"
"Seg: ID must start with GENIE-SAGE\n"
)
error, warning = seg_class._validate(segDf)
assert error == expectedErrors
assert warning == ""
segDf = pd.DataFrame(
{
"ID": ["ID1", "ID2", "ID3", "ID4", "ID5"],
"CHROM": [1, 2, 3, 4, 5],
"LOC.START": [1, 2, 3, 4.3, 3],
"LOC.END": [1, 2, 3.4, 4, 3],
"NUM.MARK": [1, 2, 3, 33.3, 3],
"SEG.MEAN": [1, 2, "f.d", 4, 3],
}
)
error, warning = seg_class._validate(segDf)
expectedErrors = (
"Seg: Only integars allowed in these column(s): "
"LOC.END, LOC.START, NUM.MARK.\n"
"Seg: Only numerical values allowed in SEG.MEAN.\n"
"Seg: ID must start with GENIE-SAGE\n"
)
assert error == expectedErrors
assert warning == ""