forked from stanleyouth/-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblast2consensus.sh
102 lines (73 loc) · 3.25 KB
/
blast2consensus.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
##-------------------------------------------------------------------------------------------------------
##################### required fields |
##################### fill this part properly |
## |
## run settings |
genome=genome.fa # needs correction
reads=reliable.fa # whatever reliable sequences
tmpdir=/path/to/your/workdir ## dir where you want to output logs, intermediate and final results
T='20' ## threads, ie. the number of files the genome is going to be divided into.
extend_len='5'
## |
## environment config |
blastn=/path/to/bin/blastn ##
makedb=/path/to/binmakeblastdb ##
bin=/path/to/the/perl-scripts/ ## dir where involved perl scripts are stored. |
##-------------------------------------------------------------------------------------------------------
#################### keep this part intact
###################
cd $tmpdir;
idx=$reads.idx
step=$1;
##--------------------------------STEP 1-------------------------------------------------------------------------
ln -s $genome ./
if [ $step == 1 ]; then
a=`grep ">" $genome | wc -l`
((s=$a/$T+1))
if [ -f $tmpdir/blast.commands.all ]; then
rm $tmpdir/blast.commands.all
fi
if [ -f $tmpdir/shell.list.all ]; then
rm $tmpdir/shell.list.all
fi
#perl $bin/split.pl $s $genome $tmpdir # spliting genome
cat $tmpdir/fa.list | while read line ;do
## generating all blast commands
cat >> $tmpdir/blast.commands.all << EOF
$blastn -num_threads 2 -outfmt 4 -db $idx -query $line -out $line.blast.m4
perl $bin/find_indel.pl -m4 $line.blast.m4 -extend_len $extend_len $line.cns
EOF
## generating fofn of blast shell scripts
cat >> $tmpdir/shell.list.all << EOF
$line.qsub.sh
EOF
## generating individual blast work shell
cat > $line.qsub.sh << EOF
$blastn -num_threads 2 -outfmt 4 -db $idx -query $line -out $line.blast.m4
perl $bin/find_indel.pl -m4 $line.blast.m4 -extend_len $extend_len $line.cns
EOF
done
fi
##---------------------------------------------------------------------------------------------------------
##---------------------------------STEP 2------------------------------------------------------------------------
if [ $step == 2 ]; then
$makedb -in $reads -out $idx -dbtype nucl
# or cat this command into a shell and qsub.
fi
##---------------------------------------------------------------------------------------------------------
##---------------------------------STEP 3------------------------------------------------------------------------
##----------------------------------- massive computing --------------------------------------------------
if [ $step == 3 ]; then
cat $tmpdir/shell.list.all | while read line ; do
qsub $line
sleep 1
done
fi
##---------------------------------------------------------------------------------------------------------
##---------------------------------STEP 4------------------------------------------------------------------------
##-------------------------------------- final step, cat results together---------------------------------------
########### cat results
if [ $step == 4 ]; then
cat $tmpdir/*cut/*.cns > $tmpdir/final.results.cns.fa
fi
##---------------------------------------------------------------------------------------------------------