-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastqfetch.sh
executable file
·68 lines (57 loc) · 2.15 KB
/
fastqfetch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/bash
help(){
echo "This script is going to download the SRA files from an SRA list provided after sorting the ID from an SRA table"
echo "and place them inside of a new folder"
echo "No argument is needed"
echo
echo "This script functions if the table file has a header"
echo "If not, just change the done by the one behind a #"
exit 1
}
# Add sratoolkit to PATH
# export PATH="/scratch/user/uqsdemon/ApplicationGNN/gnnenvo/bin/sratoolkit.3.1.1-ubuntu64/bin:$PATH"
export PATH="/scratch/project/tcr_ml/SRR_database_extraction/sratoolkit.3.1.1-ubuntu64/bin:$PATH"
# Load necessary modules (if still required)
module load sra-tools
module load samtools
# Path to the file containing SRR IDs
fastq_ids_file="SRRIDS/fastq_ids.txt"
# List of IDs to exclude
exclude_ids_file="filedone.txt"
output_scratch="/scratch/project/tcr_ml/SRR_database_extraction/fastqfilesncbi"
keypath="prj_33410_D38764.ngc"
# Check that sra-tools is loaded correctly
if ! command -v fasterq-dump &> /dev/null; then
echo "fasterq-dump could not be found. Please check your sratoolkit installation and PATH."
help
fi
if [ ! -f "$fastq_ids_file" ]; then
echo "Error: the file with SRA IDs does not exist."
help
fi
if [ ! -f "$keypath" ]; then
echo "Error: the NGC file does not exist at the specified path."
help
fi
# Load the IDs to exclude into an array
mapfile -t exclude_ids < "$exclude_ids_file"
temp_path="$TMPDIR"
# Ensure the temporary directory exists
mkdir -p "$temp_path"
mkdir -p "$output_scratch"
# Function to download and convert SRA files
download_and_convert() {
srr_id=$1
if printf '%s\n' "${exclude_ids[@]}" | grep -qx "$srr_id"; then
echo "Skipping excluded ID: $srr_id"
return
fi
echo "Downloading and converting $srr_id to FASTQ format from NCBI..."
mkdir -p "$output_scratch/$srr_id"
fasterq-dump --ngc "$keypath" --split-files "$srr_id" -O "$output_scratch/$srr_id" --temp "$temp_path"
}
export -f download_and_convert
export output_scratch keypath temp_path exclude_ids
# Use GNU Parallel to run the download_and_convert function in parallel
cat "$fastq_ids_file" | parallel -j 2 download_and_convert
echo "Download over"