-
Notifications
You must be signed in to change notification settings - Fork 0
/
cnvkit_results.py
64 lines (43 loc) · 1.95 KB
/
cnvkit_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Author: Julie BOGOIN
import os
import pandas
print("\n************************************")
print("cnvkit CNV results program openning.")
print("************************************\n")
if os.path.isfile('cnvkit_results.csv'):
os.remove('cnvkit_results.csv')
print('Previous results file removed.')
li = []
path = os.getcwd()
folders = os.listdir(path)
for folder in folders:
files = os.listdir(folder)
for name in files:
if ".new.call.cns" in name:
df = pandas.read_csv((folder + '/' + name), sep='\t',index_col=None, header=[0])
df.dropna(how='all')
sample_name = name.split('.')
df['sample'] = sample_name[0]
li.append(df)
concat = pandas.concat(li, axis=0, ignore_index=True)
df_sex = pandas.read_csv('../samples.txt', header = [0], sep="\t", index_col=None)
frame = pandas.merge(concat, df_sex, left_on='sample', right_on='sample')
total = frame.shape[0] - 12
frame.rename(columns={'log2': 'log2copy_ratio'}, inplace=True)
frame.rename(columns={'chromosome': 'contig'}, inplace=True)
frame.rename(columns={'cn': 'CN'}, inplace=True)
frame.rename(columns={'probes': 'targets_number'}, inplace=True)
del frame['gene']
del frame['depth']
del frame['weight']
frame.query('log2copy_ratio>0.485 or log2copy_ratio<-0.69', inplace=True)
frame['cnv_ratio'] = frame['log2copy_ratio']**2
frame['effect']='i'
frame.loc[frame.log2copy_ratio>0.485, 'effect'] = "duplication"
frame.loc[frame.log2copy_ratio<-0.69, 'effect'] = "deletion"
cols = ['sample', 'sex', 'contig', 'start', 'end', 'cnv_ratio','log2copy_ratio', 'CN', 'effect', 'targets_number']
frame = frame[cols]
print('{0} CNV lines filtred among {1} lines found by cnvkit.'.format(frame.shape[0], total))
frame.to_csv('cnvkit_results.csv', index=False)
print("cnvkit_results.csv generated.\n")
print("cnvkit CNV results job done!\n")