-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests_intersect_maf.py
133 lines (109 loc) · 7.33 KB
/
tests_intersect_maf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import unittest
import subprocess
import filecmp
import os
import shutil
import re
# Define test result directories
test_tsv_dir = "tests/tsv/"
test_fa_dir = "tests/fa/"
test_maf_dir = "tests/maf/"
test_bed_dir = "tests/bed/"
test_results_dir = "tests/results/"
class TestIntersectMafBed(unittest.TestCase):
# basic use case
def test_plain_text_input(self):
test_out = "check_bed_plain.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "A.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_basic.maf"))
# basic use case (gz input)
def test_gzip_input(self):
test_out = "check_bed_gz.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf.gz", "-b", test_bed_dir + "A.bed.gz", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_basic.maf"))
# do not report length=0 maf blocks (leading edge)
def test_empty(self):
test_out = "check_empty.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test1.maf", "-b", test_bed_dir + "A_edge.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_empty.maf"))
# do not report length=0 maf blocks (trailling edge)
def test_empty_2(self):
test_out = "check_empty2.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test1.maf", "-b", test_bed_dir + "A_edge_trailling.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_empty.maf"))
# test that minimum overlap filter works (1)
def test_min_overlap(self):
test_out = "check_min_overlap1.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "multiple_hits.bed", "-r", "A", "-l", "4" , "-o", test_results_dir + test_out], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_min_length_4.maf"))
# test that minimum overlap filter works (2)
def test_min_overlap(self):
test_out = "check_min_overlap2.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "multiple_hits.bed", "-r", "A", "-l", "16" , "-o", test_results_dir + test_out], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_empty.maf"))
# include maf blocks without hits in bed
def test_no_hits(self):
test_out = "check_no_hits.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "A_reverse.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_no_hit.maf"))
# strand awarenes (flip bed coordinates for "-" sequences in maf)
def test_reverse(self):
test_out = "check_reverse.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test4.maf", "-b", test_bed_dir + "A_reverse.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_reverse.maf"))
# strand awarenes (flip bed coordinates for "-" sequences in maf)
def test_reverse(self):
test_out = "check_reverse.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test4.maf", "-b", test_bed_dir + "A_reverse.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_reverse.maf"))
# merge overlapping bed regions
def test_merge_overlaps(self):
test_out = "check_merge.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "overlaps.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_basic.maf"))
# merge overlapping bed regions
def test_spanning_maf_regions(self):
test_out = "check_spanning_regions.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "spanning_multiple_maf_seqs.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_spanning.maf"))
# multiple bed regions in single maf block
def test_multiple_bed_regions_in_seq(self):
test_out = "check_multiple_hits.maf"
subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3.maf", "-b", test_bed_dir + "multiple_hits.bed", "-r", "A", "-o", test_results_dir + test_out ], stderr=subprocess.DEVNULL)
# Compare the output with the target file
self.assertTrue(filecmp.cmp(test_results_dir + test_out, test_maf_dir + "target_multiple_hits.maf"))
# failure upon asking for the wrong sample with `--ref`
def test_failure_wrong_ref(self):
process = subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test4.maf", "-b", test_bed_dir + "A_reverse.bed", "-r", "B", "-o", "/dev/stdout"], stderr=subprocess.PIPE)
# Check if the process failed (non-zero exit code)
self.assertNotEqual(process.returncode, 0)
# Check if the error message contains the expected error string
expected_error = "maf file does not seem to use"
self.assertIn(expected_error, process.stderr.decode("utf-8"))
# failure with inconsistent refernce sample name in maf
def test_failure_inconsistent_ref(self):
process = subprocess.run(["./intersect_maf_bed", "-m", test_maf_dir + "test3_bad.maf", "-b", test_bed_dir + "A.bed", "-r", "A", "-o", "/dev/stdout"], stderr=subprocess.PIPE)
# Check if the process failed (non-zero exit code)
self.assertNotEqual(process.returncode, 0)
# Check if the error message contains the expected error string
expected_error = "maf file does not seem to use"
self.assertIn(expected_error, process.stderr.decode("utf-8"))
# wrong ref
# ./intersect_maf_bed -m tests/maf/test3.maf -b tests/bed/A.bed -r B -o /dev/stdout
if __name__ == "__main__":
if not os.path.exists(test_results_dir):
os.makedirs(test_results_dir)
result = unittest.main(exit=False)
if result.result.wasSuccessful():
shutil.rmtree(test_results_dir)