-
Notifications
You must be signed in to change notification settings - Fork 0
/
DELLY_7.Final_Ann.sh
100 lines (77 loc) · 3.83 KB
/
DELLY_7.Final_Ann.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/bin/bash
## Script to generate single file that contains variants annotated with Annotations sources as listed in AnnotSV with 70% and 10% annotation of CP/Epilepsy/ID genes
##
## Date: 29 May 2019
##
## Example usage: INDIR=/fast/users/a1742674/outputs/Annotated/CNVnator_V sbatch FinalAnn.sh
#SBATCH -A robinson
#SBATCH -p batch
#SBATCH -N 1
#SBATCH -n 1
#SBATCH --time=00:05:00
#SBATCH --mem=4GB
# Notifiion configuration
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH [email protected]
#define directories
INDIR=/fast/users/a1742674/outputs/Annotated/050619/dellycalls_3
OUTDIR=/fast/users/a1742674/outputs/Annotated/050619/dellycalls_3/Final_Annotated_200719
## Check directories ##
if [ ! -d $INDIR ]; then
echo "$INDIR not found. Please check you have the right one."
exit 1
fi
if [ ! -d $OUTDIR ]; then
mkdir -p $OUTDIR
fi
## Start of the script
#define files needed
cd $INDIR/done_70%Ann_200719
f=$(ls *.tsv)
#1.first loop to extract columns from 70% annotated files (OUTPUT :5.V2038.DEL.Discordant.DNV.70%.temp.tsv)
for F in $f;
do
(
#basename (5.V2038.DEL.Discordant.DNV.70%.annotated.tsv ----> 5.V2038.DEL.Discordant.DNV.70%)
B=$(basename $F .annotated.tsv)
# create first temporary file to exclude 6 columns on known disease genes from 70% annotated file without modifying the delimiters
cat $INDIR/done_70%Ann_200719/$F | awk 'BEGIN{FS=OFS="\t"}{$52=$53=$54=$61=$62=$63=""; print $0}' > $OUTDIR/$B.temp.tsv
)
done
echo "done with excluding columns from 70% annotated files -----> $OUTDIR"
#2. extracting columns from 10% annotated files (OUTPUT :5.V2038.DEL.Discordant.DNV.10%.temp.tsv)
cd $INDIR/done_10%Ann_200719
v=$(ls *.tsv)
for V in $v;
do
(
#basename (5.V2038.DEL.Discordant.DNV.10%.annotated.tsv -----> 5.V2038.DEL.Discordant.DNV.10%)
S=$(basename $V .annotated.tsv)
# create 2nd temporary file include first (AnnotSV, 6 columns) from 10% annotated file without modifying the delimiters
cat $INDIR/done_10%Ann_200719/$V | awk 'BEGIN{FS=OFS="\t"}{print $1,$52,$53,$54,$61,$62,$63}' > $OUTDIR/$S.temp.tsv
)
done
echo "done with extracting columns of 10% annotated files ----> $OUTDIR"
#3.To merge the files horizontally
cd $OUTDIR
#define files to select only certain part of their names (5.V2038.DEL.Discordant.DNV.70%.annotated.tsv --> 5.V2038.DEL.Discordant.DNV) and we don't want to run for loop by duplicates
Z=$(ls *.70%.temp.tsv | sort | uniq)
for x in $Z;
do
(
# basename
X=$(basename "$x" .70%.temp.tsv)
echo "processing $X"
#define 70% and 10% files
A=$X.70%.temp.tsv
B=$X.10%.temp.tsv
echo " merging $A and $B vertically"
# combine both files in a single .tsv file by referrign to the fist column of both files
paste $A $B > $X.fullAnnotated.tsv
wait
#remove temp files
rm $A $B
)
done
echo "successfully merged; recheck in $OUTDIR"