-
Notifications
You must be signed in to change notification settings - Fork 3
/
sum_contrasts_updownNC2_loop.py
67 lines (63 loc) · 2.12 KB
/
sum_contrasts_updownNC2_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''script used to combine multiple contrast files into a matrix and determines whether a gene is significantly
up or down regulated by a FC >= 1 for up and FC <= -1 for down and an adj_pvalue <= 0.05
USAGE: python sum_contrasts_updownNC2_loop.py [directory containing contrast files] [output matrix name]
'''
import os, sys
start_dir = sys.argv[1]
#file = open(sys.argv[1])
sum_matrix = open(sys.argv[2],"w")
def add_data_to_dict(inp,D):
header=inp.readline()
for line in inp:
L = line.strip().split("\t")
if len(L) > 1:
gene = L[0]
FC = L[1]
adj_pvalue = L[5]
if float(adj_pvalue) <=0.05:
if float(FC) >= 1:
if gene not in D:
D[gene] = ["up"]
else:
D[gene].append("up")
else:
if float(FC) <= -1:
if gene not in D:
D[gene] = ["down"]
else:
D[gene].append("down")
else:
if gene not in D:
D[gene] = ["NC"]
else:
D[gene].append("NC")
else:
if gene not in D:
D[gene] = ["NC"]
else:
D[gene].append("NC")
D = {}
#add_data_to_dict(file, D)
#loop through directory for each file to add input and each filename
title_list = []
for file in os.listdir(start_dir):
if file.startswith("contrast_"):
name = file.strip().split("_")
print (name)
title_list.append(name[1])
inp = open(start_dir + "/" + file)
add_data_to_dict(inp,D)
inp.close()
title_str = "\t".join(title_list)
print (title_str)
print (D)
#write heading for gene and each filename
sum_matrix.write("gene\t %s\n" % title_str)
#write logFC data to each gene
for gene in D:
data_list= D[gene]
#for data in data_list:
string= "\t".join(data_list)
sum_matrix.write(gene + "\t" + "%s" % string + "\n")
sum_matrix.close()
#file.close()