-
Notifications
You must be signed in to change notification settings - Fork 0
/
excavator2_results.py
71 lines (48 loc) · 2.34 KB
/
excavator2_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Author: Julie BOGOIN
import os
import pandas
print("\n************************************")
print("EXCAVATOR2 CNV results program openning.")
print("************************************\n")
if os.path.isfile('excavator2_results.csv'):
os.remove('excavator2_results.csv')
print('Previous results file removed.')
li = []
path = '.'
folders = os.listdir(path)
for folder in folders:
subfolders = os.listdir(folder)
for subfolder in subfolders:
if "w10K_results." in subfolder:
subfolder_name = subfolder.split('.')
sample_name = subfolder_name[1]
result_path = folder + '/' + subfolder + '/' + 'Results/' + sample_name
files = os.listdir(result_path)
for name in files:
if "FastCallResults_" in name:
txt_file = result_path + '/' + name
df = pandas.read_csv(txt_file, sep='\t',index_col=None, header=[0])
df.dropna(how='all')
df['sample'] = subfolder_name[1]
li.append(df)
concat = pandas.concat(li, axis=0, ignore_index=True)
df_sex = pandas.read_csv('../samples.txt', header = [0], sep="\t", index_col=None)
frame = pandas.merge(concat, df_sex, left_on='sample', right_on='sample')
total = frame.shape[0] - 12
frame.rename(columns={'Chromosome': 'contig'}, inplace=True)
frame.rename(columns={'Start': 'start'}, inplace=True)
frame.rename(columns={'End': 'end'}, inplace=True)
frame.rename(columns={'Segment': 'log2copy_ratio'}, inplace=True)
frame.rename(columns={'CNF': 'cnv_ratio'}, inplace=True)
frame.query('log2copy_ratio>0.485 or log2copy_ratio<-0.69', inplace=True)
frame['effect']='i'
frame.loc[frame.log2copy_ratio>0.485, 'effect'] = "duplication"
frame.loc[frame.log2copy_ratio<-0.69, 'effect'] = "deletion"
del frame['Call']
del frame['ProbCall']
cols = ['sample', 'sex', 'contig', 'start', 'end', 'cnv_ratio','log2copy_ratio', 'CN', 'effect']
frame = frame[cols]
print('{0} CNV lines filtred among {1} lines found by excavator2.'.format(frame.shape[0], total))
frame.to_csv('excavator2_results.csv', index=False)
print("excavator2_results.csv generated.\n")
print("EXCAVATOR2 CNV results job done!\n")