forked from cguccione/human_host_filtration
-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit_filter.array.sh
27 lines (20 loc) · 1.26 KB
/
submit_filter.array.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash -l
# author: Lucas Patel ([email protected])
# date: 12/22/23
# description: Script to run
config_fn=$1
source ${config_fn}
export config_fn="${config_fn}"
echo "Beginning host filtration (job array mode) on directory: ${IN}"
export TMPDIR="${TMP}/$(basename $(mktemp -d))"
mkdir -p ${TMPDIR}
echo $TMPDIR
find "$IN" -maxdepth 1 -type f \( -name '*_R1*.fastq' -o -name '*_R1*.fastq.gz' \) -exec sh -c 'for f; do echo "$f"; done >> "$TMPDIR/r1_files.txt"' sh {} +
find "$IN" -maxdepth 1 -type f \( -name '*_R2*.fastq' -o -name '*_R2*.fastq.gz' \) -exec sh -c 'for f; do echo "$f"; done >> "$TMPDIR/r2_files.txt"' sh {} +
find "$IN" -maxdepth 1 -type f \( -name '*.fastq' -o -name '*.fastq.gz' \) | grep -vE '_R[12]' > "$TMPDIR/other_files.txt"
echo "Found $(wc -l < "$TMPDIR/r1_files.txt") R1 FASTQ files" && echo "Found $(wc -l < "$TMPDIR/r2_files.txt") R2 FASTQ files" && [ $(wc -l < "$TMPDIR/r1_files.txt") -eq $(wc -l < "$TMPDIR/r2_files.txt") ] || echo "Warning: The number of R1 and R2 FASTQ files is not the same."
echo "Found $(wc -l < "$TMPDIR/other_files.txt") other files"
cat "$TMPDIR/r1_files.txt" "$TMPDIR/other_files.txt" > "$TMPDIR/all_files.txt"
num_jobs=$(wc -l < "$TMPDIR/all_files.txt")
echo $num_jobs
sbatch --array=1-$num_jobs filter.array.sbatch