-
Notifications
You must be signed in to change notification settings - Fork 3
Snippet: Checking for dead links to Phosphosite Plus
krassowski edited this page Jun 16, 2017
·
2 revisions
import sys
import requests
from requests.exceptions import ConnectTimeout, ReadTimeout
from tqdm import tqdm
from app import create_app
from models import Protein
template = 'http://www.phosphosite.org/uniprotAccAction?id={accession}'
create_app(config_override={'LOAD_STATS': False})
def request(url, times=5):
try:
r = requests.get(
url,
timeout=1
)
if not r.ok:
r.raise_for_status()
sys.exit()
return r
except (ConnectTimeout, ReadTimeout):
if times:
return request(url, times=times-1)
def main():
tested = 0
skipped = 0
not_founds = []
not_found_and_reviewed = []
no_externals = []
no_uniprot = []
for protein in tqdm(Protein.query.all()):
prot_rep = (
protein.gene.name,
protein.refseq
)
if not protein.external_references:
no_externals.append(prot_rep)
continue
if not protein.external_references.uniprot_entries:
no_uniprot.append(prot_rep)
continue
for uniprot in protein.external_references.uniprot_entries:
url = template.format(accession=uniprot.accession)
r = request(url)
if not r:
print('Unable to connect for %s (%s)' % prot_rep)
skipped += 1
continue
if 'No records found that match the search criteria' in r.text:
not_founds.append((prot_rep[0], prot_rep[1], uniprot.accession))
if uniprot.reviewed:
not_found_and_reviewed.append((prot_rep[0], prot_rep[1], uniprot.accession))
print('Reviewed but not found: %s (%s)' % prot_rep)
print(url)
tested += 1
print('Tested %s' % tested)
print('Skipped %s' % skipped)
return {
'not_found': not_founds,
'no_uniprot': no_uniprot,
'no_externals': no_externals,
'not_found_and_reviewed': not_found_and_reviewed,
}
results = main()