Skip to content

Commit

Permalink
Merge pull request #30 from geneontology/fix_goref
Browse files Browse the repository at this point in the history
add some download retries here
  • Loading branch information
sierra-moxon authored Feb 13, 2024
2 parents 6237a2e + a16d119 commit ce0df9b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/config/download_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ GOA_taxon_10090_ISOFORM:
url: https://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/goa_mouse_isoform.gaf.gz
# TODO: change this to skyhook when running thru pipeline
MGI_NOCTUA:
url: http://snapshot.geneontology.org/products/upstream_and_raw_data/noctua_mgi.gpad.gz
url: http://snapshot.geneontology.org/products/upstream_and_raw_data/noctua_mgi-src.gpad.gz
10 changes: 5 additions & 5 deletions src/gopreprocess/goa_annotation_creation_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from src.gopreprocess.file_processors.gaf_processor import GafProcessor
from src.gopreprocess.file_processors.gpi_processor import GpiProcessor
from src.utils.decorators import timer
from src.utils.download import download_file
from src.utils.download import download_file, download_with_retry


def generate_annotation(
Expand Down Expand Up @@ -41,9 +41,9 @@ def generate_annotation(
# PR:Q9DAQ4-1 = UniProtKB:Q9DAQ4-1
if isoform:
pr_id = protein_xrefs[str(annotation.subject.id)]
print("pr_id", pr_id)
print("parent_xrefs", parent_xrefs[pr_id])
print("annotation.subject.id", annotation.subject.id)
# print("pr_id", pr_id)
# print("parent_xrefs", parent_xrefs[pr_id])
# print("annotation.subject.id", annotation.subject.id)
# PR:Q9DAQ4-1 = MGI:MGI:1918911
mgi_id = parent_xrefs[pr_id]
new_gene = Curie(namespace=mgi_id.split(":")[0], identity=mgi_id.replace("MGI:MGI:", "MGI:"))
Expand Down Expand Up @@ -89,7 +89,7 @@ def get_source_annotations(
:rtype: tuple[dict, Any]
"""
taxon = taxon.replace("NCBITaxon:", "taxon_")
p2go_file = download_file(target_directory_name=f"GOA_{taxon}", config_key=f"GOA_{taxon}", gunzip=True)
p2go_file = download_with_retry(target_directory_name=f"GOA_{taxon}", config_key=f"GOA_{taxon}", gunzip=True)

target_gpi_path = download_file(target_directory_name="MGI_GPI", config_key="MGI_GPI", gunzip=True)

Expand Down
23 changes: 20 additions & 3 deletions src/utils/download.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Module contains functions for downloading files from the web."""

from pathlib import Path

from urllib.error import URLError
import time
import pystow

from src.utils.decorators import timer
Expand Down Expand Up @@ -32,6 +33,18 @@ def download_files(source_taxon: str, target_taxon: str) -> tuple[Path, Path, Pa
return ortho_path, source_gaf_path, target_gpi_path


def download_with_retry(target_directory_name, config_key, gunzip=True, retries=3):
attempt = 0
while attempt < retries:
try:
return download_file(target_directory_name, config_key, gunzip)
except Exception as e: # Broad exception catch due to the abstraction of download details
print(f"Download failed on attempt {attempt + 1} due to: {e}. Retrying...")
attempt += 1
time.sleep(5) # Wait for 5 seconds before retrying
raise Exception(f"Failed to download file after {retries} attempts.")


def download_file(target_directory_name: str, config_key: str, gunzip=False) -> Path:
"""
Downloads a file from the given URL.
Expand All @@ -42,9 +55,13 @@ def download_file(target_directory_name: str, config_key: str, gunzip=False) ->
"""
if gunzip:
file_path = pystow.ensure_gunzip(target_directory_name, url=get_url(config_key), force=True)
file_path = pystow.ensure_gunzip(target_directory_name,
url=get_url(config_key),
force=True)
else:
file_path = pystow.ensure(target_directory_name, url=get_url(config_key), force=True)
file_path = pystow.ensure(target_directory_name,
url=get_url(config_key),
force=True)
return file_path


Expand Down

0 comments on commit ce0df9b

Please sign in to comment.