-
Notifications
You must be signed in to change notification settings - Fork 1
/
interleave-to-before-asm.sh
87 lines (66 loc) · 1.89 KB
/
interleave-to-before-asm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/bin/bash --login
#PBS -l walltime=40:00:00
#PBS -l mem=100gb
#PBS -l nodes=1:ppn=8
#PBS -m abe
#PBS -N elk-sub
#PBS -A ged
cd ${PBS_O_WORKDIR}
## TRIMMING - FROM RAW READS ##
# FastQC
source hpcc.modules
mkdir -p output-fq
for filename in *.fastq.gz;
do
fastqc -o output-fq $filename
done
rm -f orphans.fastq.gz
rm -f TruSeq3-PE.fa
# Get Illumina TruSeq3 Adapters
wget https://anonscm.debian.org/cgit/debian-med/trimmomatic.git/plain/adapters/TruSeq3-PE.fa
for filename in *_R1_*.fastq.gz
do
base=$(basename $filename .fastq.gz)
echo $base
baseR2=${base/_R1_/_R2_}
echo $baseR2
java -jar $TRIM/trimmomatic PE -phred33 ${base}.fastq.gz ${baseR2}.fastq.gz \
intermediate/${base}.trim.qc.fq.gz intermediate/s1_se \
intermediate/${baseR2}.trim.qc.fq.gz intermediate/s2_se \
ILLUMINACLIP:TruSeq3-PE.fa:2:40:15 \
LEADING:2 TRAILING:2 \
SLIDINGWINDOW:4:2 \
MINLEN:25
# save the orphans
gzip -9c intermediate/s{1,2}_se >> intermediate/orphans.fq.gz
rm -f intermediate/s{1,2}_se
done
# FastQC again
for filename in intermediate/*.trim.qc.fq.gz;
do
fastqc -o output-fq $filename
done
## INTERLEAVE READS - FROM TRIMMED READS ##
set -o pipefail
set -x
module use /opt/software/ged-software/modulefiles/
module load anaconda
source activate elk
module swap GNU GNU/4.9
set -e
for filename in intermediate/*_R1_*.trim.qc.fq.gz
do
# first, make the base by removing .extract.fastq.gz
base=$(basename $filename .trim.qc.fq.gz)
echo $base
# now, construct the R2 filename by replacing R1 with R2
baseR2=${base/_R1_/_R2_}
echo $baseR2
# construct the output filename
output=${base/_R1_/}.pe.trim.qc.fq.gz
(interleave-reads.py intermediate/${base}.trim.qc.fq.gz intermediate/${baseR2}.trim.qc.fq.gz | \
gzip > intermediate/$output)
done
cat ${PBS_NODEFILE}
env | grep PBS
qstat -f ${PBS_JOBID}