-
Notifications
You must be signed in to change notification settings - Fork 1
/
genes_table.py
150 lines (139 loc) · 4.89 KB
/
genes_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import sys
from ASB_app import *
from ASB_app.releases import ReleaseBillCipher
import numpy as np
release = ReleaseBillCipher
Gene = release.Gene
SNP = release.SNP
TFSNP = release.TranscriptionFactorSNP
CLSNP = release.CellLineSNP
TF = release.TranscriptionFactor
CL = release.CellLine
ExpSNP = release.ExpSNP
Experiment = release.Experiment
session = release.session
db = release.db
if __name__ == '__main__':
if sys.argv[1] == 'TF':
AGSNP = TFSNP
AG = TF
OTHER = CL
elif sys.argv[1] == 'CL':
AGSNP = CLSNP
AG = CL
OTHER = TF
else:
raise ValueError
q_target = session.query(
Gene,
SNP,
AGSNP,
AG,
OTHER.name
).join(
SNP,
Gene.snps_by_target
).join(
AGSNP,
getattr(SNP, 'tf_aggregated_snps' if AG == TF else 'cl_aggregated_snps')
).filter(
AGSNP.best_p_value > np.log10(20)
).join(
AG,
getattr(AGSNP, 'transcription_factor' if AG == TF else 'cell_line')
).join(
ExpSNP,
AGSNP.exp_snps
).filter(
(ExpSNP.p_value_ref - ExpSNP.p_value_alt) * (
AGSNP.log_p_value_alt - AGSNP.log_p_value_ref) > 0
).join(
Experiment,
ExpSNP.experiment,
).join(
OTHER,
getattr(Experiment, 'transcription_factor' if AG == CL else 'cell_line')
)
q_promoter = session.query(
Gene,
SNP,
AGSNP,
AG,
OTHER.name
).join(
SNP,
Gene.proximal_promoter_snps
).join(
AGSNP,
getattr(SNP, 'tf_aggregated_snps' if AG == TF else 'cl_aggregated_snps')
).filter(
AGSNP.best_p_value > np.log10(20)
).join(
AG,
getattr(AGSNP, 'transcription_factor' if AG == TF else 'cell_line')
).join(
ExpSNP,
AGSNP.exp_snps
).filter(
(ExpSNP.p_value_ref - ExpSNP.p_value_alt) * (
AGSNP.log_p_value_alt - AGSNP.log_p_value_ref) > 0
).join(
Experiment,
ExpSNP.experiment,
).join(
OTHER,
getattr(Experiment, 'transcription_factor' if AG == CL else 'cell_line')
)
promoter_dict = {}
target_dict = {}
for i, (q, q_dict) in enumerate(((q_promoter, promoter_dict), (q_target, target_dict))):
print(i)
for (gene, snp, agsnp, ag, other_name) in q:
if gene.start_pos == 1 and gene.end_pos == 1:
# if GENE_ID is not in gencode v35, but gene is a GTEX eQTL target
continue
if (gene.gene_id, snp.rs_id, snp.alt) in q_dict:
q_dict[(gene.gene_id, snp.rs_id, snp.alt)][-1].add(other_name)
else:
q_dict[(gene.gene_id, snp.rs_id, snp.alt)] = [gene.gene_name, gene.gene_id, snp.chromosome, snp.position,
'rs' + str(snp.rs_id), snp.ref, snp.alt,
gene.start_pos - snp.position if gene.orientation
else snp.position - gene.end_pos, ag.name,
'{} ({})'.format(*(('ref', snp.ref) if agsnp.log_p_value_ref > agsnp.log_p_value_alt else ('alt', snp.alt))),
max(agsnp.log_p_value_ref, agsnp.log_p_value_alt),
agsnp.es_ref if agsnp.log_p_value_ref > agsnp.log_p_value_alt else agsnp.es_alt,
{other_name}]
all_keys = list(set(promoter_dict.keys()) | set(target_dict.keys()))
with open(os.path.expanduser('~/{}_genes_all_{}.tsv'.format('tf' if AG == TF else 'cl', release.name)), 'w') as out:
out.write(
'\t'.join(
map(str, [
'Gene_name',
'Gene_id'
'Chromosome',
'Position',
'rs_ID',
'Ref',
'Alt',
'Distance_to_TSS',
'TF' if AG == TF else 'Cell_type',
'Preferred_allele',
'Log10_FDR',
'Effect_size(log2)',
'Supporting_{}'.format('TFs' if AG == CL else 'Cell_types'),
'eQTL',
'promoter_SNP',
])
) + '\n'
)
for key in all_keys:
if key in target_dict:
data = target_dict[key]
else:
data = promoter_dict[key]
out.write(
'\t'.join(
map(str, data[:-1] + ['|'.join(data[-1])] + [key in target_dict, key in promoter_dict])
) + '\n'
)