-
Notifications
You must be signed in to change notification settings - Fork 5
/
SciClone_input_prep
157 lines (89 loc) · 6.34 KB
/
SciClone_input_prep
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
### creating the VAF files for the SciClone to find tumor clonality
## S_313_tumor
awk -v OFS='\t' '{print $1, $2, $11}' S_313_T__soma_snvs.fpfilter_tab_clean.vcf > S_313_T_varscan_vafs.dat
more S_313_T_varscan_vafs.dat | grep -v "#" > S_313_T_varscan_vafs_int.dat
tr ':' '\t' < S_313_T_varscan_vafs_int.dat > S_313_T_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_313_T_varscan_vafs_int2.dat > S_313_T_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_313_T_varscan_vafs_int3.dat > S_313_T_varscan_vafs_final.dat
echo $'CHROM\tPOS\tTUM1_REF_READS\tTUM1_VAR_READS\tTUM1_VAF' | cat - S_313_T_varscan_vafs_final.dat > S_313_T_varscan_vaf.dat
mv S_313_T_varscan_vaf.dat SciClone_input
mv *.dat int_files
#!/bin/sh
#
#$ -N sciclone_input
#$ -cwd
#$ -e err_sciclone_input.txt
#$ -o out_sciclone_input.txt
#$ -S /bin/sh
#$ -M [email protected]
#$ -m bea
#$ -l h_vmem=25G
cd /path_to/vdas/client/exome_seq/results/varscan_out_17112014/Somatic_hc/fpfilter/fpflt_vcf_out/
## S_313_ips1
awk -v OFS='\t' '{print $1, $2, $11}' S_313_i1__soma_snvs.fpfilter_tab_clean.vcf > S_313_i1_varscan_vafs.dat
more S_313_i1_varscan_vafs.dat | grep -v "#" > S_313_i1_varscan_vafs_int.dat
tr ':' '\t' < S_313_i1_varscan_vafs_int.dat > S_313_i1_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_313_i1_varscan_vafs_int2.dat > S_313_i1_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_313_i1_varscan_vafs_int3.dat > S_313_i1_varscan_vafs_final.dat
#echo $'CHROM\tPOS\tHIPS1_REF_READS\tHIPS1_VAR_READS\tHIPS1_VAF' | cat - S_313_i1_varscan_vafs_final.dat > S_313_i1_varscan_vaf.dat
#mv S_313_i1_varscan_vaf.dat SciClone_input
#mv *.dat int_files
## S_313_ips2
awk -v OFS='\t' '{print $1, $2, $11}' S_313_i2__soma_snvs.fpfilter_tab_clean.vcf > S_313_i2_varscan_vafs.dat
more S_313_i2_varscan_vafs.dat | grep -v "#" > S_313_i2_varscan_vafs_int.dat
tr ':' '\t' < S_313_i2_varscan_vafs_int.dat > S_313_i2_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_313_i2_varscan_vafs_int2.dat > S_313_i2_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_313_i2_varscan_vafs_int3.dat > S_313_i2_varscan_vafs_final.dat
#echo $'CHROM\tPOS\tHIPS2_REF_READS\tHIPS2_VAR_READS\tHIPS2_VAF' | cat - S_313_i2_varscan_vafs_final.dat > S_313_i2_varscan_vaf.dat
#mv S_313_i2_varscan_vaf.dat SciClone_input
#mv *.dat int_files
## S_333_tumor
awk -v OFS='\t' '{print $1, $2, $11}' S_333_T__soma_snvs.fpfilter_tab_clean.vcf > S_333_T_varscan_vafs.dat
more S_333_T_varscan_vafs.dat | grep -v "#" > S_333_T_varscan_vafs_int.dat
tr ':' '\t' < S_333_T_varscan_vafs_int.dat > S_333_T_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_333_T_varscan_vafs_int2.dat > S_333_T_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_333_T_varscan_vafs_int3.dat > S_333_T_varscan_vafs_final.dat
#echo $'CHROM\tPOS\tLTUM2_REF_READS\tLTUM2_VAR_READS\tLTUM1_VAF' | cat - S_333_T_varscan_vafs_final.dat > S_333_T_varscan_vaf.dat
#mv S_333_T_varscan_vaf.dat SciClone_input
#mv *.dat int_files
## S_333_ips1
awk -v OFS='\t' '{print $1, $2, $11}' S_333_i1__soma_snvs.fpfilter_tab_clean.vcf > S_333_i1_varscan_vafs.dat
more S_333_i1_varscan_vafs.dat | grep -v "#" > S_333_i1_varscan_vafs_int.dat
tr ':' '\t' < S_333_i1_varscan_vafs_int.dat > S_333_i1_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_333_i1_varscan_vafs_int2.dat > S_333_i1_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_333_i1_varscan_vafs_int3.dat > S_333_i1_varscan_vafs_final.dat
#echo $'CHROM\tPOS\tLIPS1_REF_READS\tLIPS1_VAR_READS\tLIPS1_VAF' | cat - S_333_i1_varscan_vafs_final.dat > S_333_i1_varscan_vaf.dat
#mv S_333_i1_varscan_vaf.dat SciClone_input
#mv *.dat int_files
## S_333_ips2
awk -v OFS='\t' '{print $1, $2, $11}' S_333_i2__soma_snvs.fpfilter_tab_clean.vcf > S_333_i2_varscan_vafs.dat
more S_333_i2_varscan_vafs.dat | grep -v "#" > S_333_i2_varscan_vafs_int.dat
tr ':' '\t' < S_333_i2_varscan_vafs_int.dat > S_333_i2_varscan_vafs_int2.dat
awk -v OFS='\t' '{print $1, $2, $6, $7, $8}' S_333_i2_varscan_vafs_int2.dat > S_333_i2_varscan_vafs_int3.dat
sed 's/^chr\|%$//g' S_333_i2_varscan_vafs_int3.dat > S_333_i2_varscan_vafs_final.dat
#echo $'CHROM\tPOS\tLIPS2_REF_READS\tLIPS2_VAR_READS\tLIPS2_VAF' | cat - S_333_i2_varscan_vafs_final.dat > S_333_i2_varscan_vaf.dat
#mv S_333_i2_varscan_vaf.dat SciClone_input
#mv *.dat int_files
echo "done"
###removing first 3 lines from the _final.dat files
#sed -e '1,3d' S_313_i1_varscan_vafs_final.dat > test.dat
sed -e '1,3d' S_313_i1_varscan_vafs_final.dat > S_313_i1_varscan_vafs_final2.dat
sed -e '1,3d' S_313_i2_varscan_vafs_final.dat > S_313_i2_varscan_vafs_final2.dat
sed -e '1,3d' S_333_T_varscan_vafs_final.dat > S_333_T_varscan_vafs_final2.dat
sed -e '1,3d' S_333_i1_varscan_vafs_final.dat > S_333_i1_varscan_vafs_final2.dat
sed -e '1,3d' S_333_i2_varscan_vafs_final.dat > S_333_i2_varscan_vafs_final2.dat
###adding headers
echo $'CHROM\tPOS\tHIPS1_REF_READS\tHIPS1_VAR_READS\tHIPS1_VAF' | cat - S_313_i1_varscan_vafs_final2.dat > S_313_i1_varscan_vaf.dat
echo $'CHROM\tPOS\tHIPS2_REF_READS\tHIPS2_VAR_READS\tHIPS2_VAF' | cat - S_313_i2_varscan_vafs_final2.dat > S_313_i2_varscan_vaf.dat
echo $'CHROM\tPOS\tLTUM2_REF_READS\tLTUM2_VAR_READS\tLTUM1_VAF' | cat - S_333_T_varscan_vafs_final2.dat > S_333_T_varscan_vaf.dat
echo $'CHROM\tPOS\tLIPS1_REF_READS\tLIPS1_VAR_READS\tLIPS1_VAF' | cat - S_333_i1_varscan_vafs_final2.dat > S_333_i1_varscan_vaf.dat
echo $'CHROM\tPOS\tLIPS2_REF_READS\tLIPS2_VAR_READS\tLIPS2_VAF' | cat - S_333_i2_varscan_vafs_final2.dat > S_333_i2_varscan_vaf.dat
####creating the bed files for the indel files that can be used for SciClone
##Convert the vcf file to bed file first, use vcf2bed of bedops
##export the path in the folder trying to work
export PATH=$PATH:/scratch/GT/softwares/bin
vcf2bed < S_333_T_LOH_snvs.fpfilter_tab_clean.vcf > S_333_T_LOH_snvs.fpfilter_tab_clean.bed
awk -v OFS='\t' '{print $1, $2, $3}' S_313_T_LOH_snvs.fpfilter_tab_clean.bed > S_313_T_LOH_snvs.fpfilter_tab_clean_3col.bed
sed 's/^chr\|%$//g' S_313_T_LOH_snvs.fpfilter_tab_clean_3col.bed > S_313_T_LOH_snvs.fpfilter_tab_clean_3col_final.bed
#### comparing vcf files for Mutect and VarScan2
bedtools intersect -a /path_to/vdas/client/exome_seq/results/mutect/exonic_call/fpfilter/original_vcf_flt/S_313_T_mutect_snvs.fpfilter_tab_clean.vcf -b /path_to/vdas/client/exome_seq/results/varscan_out_17112014/Somatic_hc/fpfilter/fpflt_vcf_out/S_313_T__soma_snvs.fpfilter_tab_clean.vcf -u -f 1.0| wc -l