forked from cguccione/human_host_filtration
-
Notifications
You must be signed in to change notification settings - Fork 0
/
split_fastq.sh
49 lines (37 loc) · 1.38 KB
/
split_fastq.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash -l
# author: Lucas Patel ([email protected])
# date: 12/22/23
# description: Script to run split an interleaved FASTQ file into separate R1/R2 files. Credit: https://biowize.wordpress.com/2015/03/26/the-fastest-darn-fastq-decoupling-procedure-i-ever-done-seen/
#!/bin/bash
# description: Script to split an interleaved FASTQ file into separate R1/R2 files. Credit: https://biowize.wordpress.com/2015/03/26/the-fastest-darn-fastq-decoupling-procedure-i-ever-done-seen/
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <interleaved_fastq> <config_file>"
exit 1
fi
input_fastq="$1"
config_fn="$2"
if [ ! -f "$input_fastq" ]; then
echo "Error: Input FASTQ file '$input_fastq' not found."
exit 1
fi
if [ ! -f "$config_fn" ]; then
echo "Error: Config file '$config_fn' not found."
exit 1
fi
source "$config_fn"
conda activate "$CONDA_ENV_NAME"
if [ -z "$OUT" ]; then
echo "Error: Output directory 'OUT' not defined in the config file."
exit 1
fi
unweave() {
local r1="$1"
local basename_r1=$(basename "$r1" .fastq)
local basename_r1="${basename_r1%.*}"
local basename_r2=$(echo "$basename_r1" | sed 's/_R1/_R2/')
echo "${r1} and ${basename_r1} ${basename_r2}"
paste - - - - - - - - < "$r1" \
| tee >(cut -f 1-4 | tr '\t' '\n' | gzip > "${OUT}/${basename_r1}.fastq.gz") \
| cut -f 5-8 | tr '\t' '\n' | gzip > "${OUT}/${basename_r2}.fastq.gz"
}
unweave "$input_fastq"