-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrarefaction_step1.sh
91 lines (54 loc) · 2.54 KB
/
rarefaction_step1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#/bin/sh
file_base=$1
log=${file_base}.pipeline.log
mkdir ${file_base}
cp ${file_base}_* ${file_base}
cd ${file_base}
#gunzip fastq files and convert to fasta before re-zipping?:
seqtk seq -a ${file_base}_R1_001.fastq.gz > ${file_base}.fasta
gzip *.fasta
#define f1:
f1=${file_base}.fasta.gz
#BBduk to find reads containing upstream flanking sequence
bbduk.sh in=$f1 outm=${file_base}_containing_upstream_flank.fa.gz literal=GATGCTGGGGACAAGTCACC k=20 hdist=1 ow=t;
f1=${file_base}_containing_upstream_flank.fa.gz
#BBduk to find reads containing downstream flanking sequence
bbduk.sh in=$f1 outm=${file_base}_containing_barcode.fa.gz literal=TTTTGCCACTATGCCTACAT k=20 hdist=1 ow=t;
f1=${file_base}_containing_barcode.fa.gz
#barcode reference sequence for mapping.
barcode1=~/scripts/WNVic_barcode1.fasta
#map to reference sequence to orient reads.
bbmap.sh in=$f1 outm=${file_base}_mapped.sam ref=$barcode1 ow=t;
#reformat plus and minus strand SAM files to FASTQ
f1=${file_base}_mapped.sam
reformat.sh in=$f1 out=${file_base}_mapped.fastq.gz
#trim reads to only contain barcodes
#remove upstream flanking sequence from oriented reads
f1=${file_base}_mapped.fastq.gz
bbduk.sh in=$f1 out=${file_base}_upstream_flank_removed.fastq.gz literal=GATGCTGGGGACAAGTCACC ktrim=l rcomp=f k=20 hdist=1 ow=t;
#remove downstream flanking sequence from oriented reads
f1=${file_base}_upstream_flank_removed.fastq.gz
bbduk.sh in=$f1 out=${file_base}_barcodes.fastq.gz literal=TTTTGCCACTATGCCTACAT ktrim=r rcomp=f k=20 hdist=1 minlength=33 maxlength=33 ow=t;
f1=${file_base}_barcodes.fastq.gz
#convert fastq to fasta
gunzip $f1
f1=${file_base}_barcodes.fastq
#Convert to fasta.
sed -n '1~4s/^@/>/p;2~4p' $f1 > ${file_base}_barcodes.fasta
#remove headers from fasta file for script input.
f1=${file_base}_barcodes.fasta
grep -v '^>' $f1 > ${file_base}_barcodes.txt
#clean up directory
rm ${file_base}_containing_upstream_flank.fa.gz
rm ${file_base}_containing_barcode.fa.gz
rm ${file_base}_mapped.sam
rm ${file_base}_barcodes.fastq
rm ${file_base}_mapped.fastq.gz
rm ${file_base}_read1.fasta.gz
rm ${file_base}_read2.fasta.gz
rm ${file_base}_upstream_flank_removed.fastq.gz
gzip ${file_base}_barcodes.fasta
#get line counts - only do this if using output w/ abundance info and weight_by option in slice_sample
#perl ~/scripts/line_counts ${file_base}_barcodes.txt > ${file_base}_barcode_counts.txt
#call R script to do bootstrap rarefaction - need to define Args to make this work
#Rscript ~/scripts/bootstrap_rarefaction.R ${file_base}_barcodes.txt > ${file_base}_subsamples.txt