-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
executable file
·65 lines (51 loc) · 2.21 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/bash
bamInput=$input
#making temp dir.
mkdir $bamInput'_'output
#bam validate
echo `date` "Bam validate" >> $bamInput.results.txt
bam validate --in $bamInput 2>>$bamInput.results.txt
printf "\n" >>$bamInput.results.txt
printf "%*s" $COLUMNS | tr " " "=" >>$bamInput.results.txt
printf "\n" >>$bamInput.results.txt
set -eu -o pipefail
#set -eu here because the bam validate part will return a non zero value then crahs
#samtools
echo `date` "Samtools fastq conversion">>$bamInput.log
echo -e " \t\t\t\t $bamInput " >>$bamInput.log
samtools fastq -1 ./$bamInput'_'output/${bamInput/.bam}.R1.fq -2 ./$bamInput'_'output/${bamInput/.bam}.R2.fq $bamInput
#dedup
echo `date` "Removing duplicate read ids - $bamInput">>$bamInput.log
for j in {1..2}; do
cat ./$bamInput'_'output/${bamInput/.bam}.R${j}.fq | perl /root/scripts/mergelines.pl | sort -k1,1 -t " " --stable --parallel=10 -T ./ -S 10G | uniq | perl /root/scripts/splitlines.pl > ./$bamInput'_'output/${bamInput/.bam}.R${j}.fq.perl
done
#fastqCombinePairedEnd
a=./$bamInput'_'output/${bamInput/.bam}.R1.fq.perl
b=./$bamInput'_'output/${bamInput/.bam}.R2.fq.perl
size=$(wc -c < $b)
if [ $size -ge 10 ]; then
echo `date` "Combining paired end reads" >>$bamInput.log
echo -e "\t\t\t\t $bamInput: is paired end reads">>$bamInput.log
python /root/scripts/fastqCombinePairedEnd.py $a $b
else
echo -e "\t\t\t\t $bamInput: is single end reads">>$bamInput.log
mv ./$bamInput'_'output/${bamInput/.bam}.R1.fq.perl ./$bamInput'_'output/${bamInput/.bam}.R1.fq.perl_pairs_R1.fastq
fi
#pigz
for uncompressedFq in ./$bamInput'_'output/*[0-9].fastq;do echo `date` "Compressing file $uncompressedFq">>$bamInput.log; pigz $uncompressedFq; done
#rename
for compressedFq in ./$bamInput'_'output/*fastq.gz;do mv $compressedFq ${compressedFq/.perl_pairs_R[0-9].fastq.gz}.gz; done
echo `date` "$bamInput - Conversion done" >>$bamInput.log
#moving gz. files to work directory
for gz in ./$bamInput'_'output/*.gz; do mv $gz ./; done
#
rm -r $bamInput'_'output
#chown output files
finish() {
# Fix ownership of output files
uid=$(stat -c '%u:%g' /data)
chown $uid /data/*${bamInput/.bam}.R[0-9].fq.gz
chown $uid /data/$bamInput.log
chown $uid /data/$bamInput.results.txt
}
trap finish EXIT