-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathupdate_clingen.sh
70 lines (56 loc) · 4.16 KB
/
update_clingen.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env bash
# download the latest files from ClinGen
wget -N ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_GRCh37.bed 2>/dev/null || \
curl -o ClinGen_haploinsufficiency_gene_GRCh37.bed \
ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_GRCh37.bed || exit 1
wget -N ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_GRCh38.bed 2>/dev/null || \
curl -o ClinGen_haploinsufficiency_gene_GRCh38.bed \
ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_GRCh38.bed || exit 1
wget -N ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_GRCh37.bed 2>/dev/null || \
curl -o ClinGen_triplosensitivity_gene_GRCh37.bed \
ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_GRCh37.bed || exit 1
wget -N ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_GRCh38.bed 2>/dev/null || \
curl -o ClinGen_triplosensitivity_gene_GRCh38.bed \
ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_GRCh38.bed || exit 1
wget -N ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh37.tsv 2>/dev/null || \
curl -o ClinGen_region_curation_list_GRCh37.tsv \
ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh37.tsv || exit 1
wget -N ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh38.tsv 2>/dev/null || \
curl -o ClinGen_region_curation_list_GRCh38.tsv \
ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh38.tsv || exit 1
# parse the tsv files into bed
python3 parse_clingen_tsv.py --infile ClinGen_region_curation_list_GRCh37.tsv || exit 1
python3 parse_clingen_tsv.py --infile ClinGen_region_curation_list_GRCh38.tsv || exit 1
# move the resulting files to Resources, delete the tsv files
mv ClinGen_haploinsufficiency_gene_GRCh37.bed Resources/hg19/ClinGen_haploinsufficiency_gene.bed || exit 1
mv ClinGen_triplosensitivity_gene_GRCh37.bed Resources/hg19/ClinGen_triplosensitivity_gene.bed || exit 1
mv ClinGen_haploinsufficiency_gene_GRCh38.bed Resources/hg38/ClinGen_haploinsufficiency_gene.bed || exit 1
mv ClinGen_triplosensitivity_gene_GRCh38.bed Resources/hg38/ClinGen_triplosensitivity_gene.bed || exit 1
mv ClinGen_region_curation_list_GRCh37.HI.bed Resources/hg19/ClinGen_region_curation_list.HI.bed || exit 1
mv ClinGen_region_curation_list_GRCh37.TS.bed Resources/hg19/ClinGen_region_curation_list.TS.bed || exit 1
mv ClinGen_region_curation_list_GRCh38.HI.bed Resources/hg38/ClinGen_region_curation_list.HI.bed || exit 1
mv ClinGen_region_curation_list_GRCh38.TS.bed Resources/hg38/ClinGen_region_curation_list.TS.bed || exit 1
rm ClinGen_region_curation_list_GRCh37.tsv
rm ClinGen_region_curation_list_GRCh38.tsv
cd Resources/hg19/
bedtools intersect -a ClinGen_region_curation_list.TS.bed -b refGenes.parsed.SelectTranscript.bed -wo |
awk '($4 == 40)' | grep 'NM_' | cut -f1-3,9 > Benign_TS_region_genelist.regions.bed
cat ClinGen_triplosensitivity_gene.bed | awk '($5 == 40)' | cut -f1-4 > Benign_TS_region_genelist.genes.bed
cat Benign_TS_region_genelist.regions.bed Benign_TS_region_genelist.genes.bed > Benign_TS_region_genelist.bed
cd ../hg38/
bedtools intersect -a ClinGen_region_curation_list.TS.bed -b refGenes.parsed.SelectTranscript.bed -wo |
awk '($4 == 40)' | grep 'NM_' | cut -f1-3,9 > Benign_TS_region_genelist.regions.bed
cat ClinGen_triplosensitivity_gene.bed | awk '($5 == 40)' | cut -f1-4 > Benign_TS_region_genelist.genes.bed
cat Benign_TS_region_genelist.regions.bed Benign_TS_region_genelist.genes.bed > Benign_TS_region_genelist.bed
cd ../../
# check that all files are not empty
if [ -s Resources/hg19/ClinGen_region_curation_list.HI.bed ] && [ -s Resources/hg19/ClinGen_region_curation_list.TS.bed ] \
&& [ -s Resources/hg38/ClinGen_region_curation_list.HI.bed ] && [ -s Resources/hg38/ClinGen_region_curation_list.TS.bed ] \
&& [ -s Resources/hg19/ClinGen_haploinsufficiency_gene.bed ] && [ -s Resources/hg19/ClinGen_triplosensitivity_gene.bed ] \
&& [ -s Resources/hg38/ClinGen_haploinsufficiency_gene.bed ] && [ -s Resources/hg38/ClinGen_triplosensitivity_gene.bed ] \
&& [ -s Resources/hg19/Benign_TS_region_genelist.bed ] && [ -s Resources/hg38/Benign_TS_region_genelist.bed ]
then
echo "The ClinGen files are updated."
else
echo "There was an error when updating the ClinGen files."
fi