Skip to content

Commit

Permalink
Merge pull request #116 from WGLab/improvements
Browse files Browse the repository at this point in the history
Improvements to API timeout, clarifications, memory issues
  • Loading branch information
jimhavrilla authored May 13, 2021
2 parents bab6d45 + 440f2a1 commit 3ea938a
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 21 deletions.
40 changes: 30 additions & 10 deletions API.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,10 @@ def generate_cohd_list(HPOquery):
'domain': "Condition", # can use "Drug" for drugs
'min_count': 1
}
rsearch=requests.get("http://cohd.io/api/omop/findConceptIDs", params=params)
try:
rsearch=requests.get("http://cohd.io/api/omop/findConceptIDs", params=params, timeout=10)
except requests.exceptions.Timeout: # if the resource is timing out after 10 sec, not worth the load.
return []
if rsearch.status_code == requests.status_codes.codes.OK:
results = rsearch.json()
results=sorted(results['results'], key=lambda k: k['concept_count'], reverse=True)
Expand All @@ -306,7 +309,13 @@ def cohd_page(concept_id):
'concept_id': concept_id,
'dataset_id': 4, # lifetime non-hierarchical is 2, 4 is temporal beta
}
rsearch=requests.get("http://cohd.io/api/omop/conceptAncestors", params=params)
headers=generate_headers()
headers={"COHDC": headers['COHDC'], "COHDA": headers['COHDA']}
try:
rsearch=requests.get("http://cohd.io/api/omop/conceptAncestors", params=params)
except requests.exceptions.Timeout:
ancestors, conditions, drugs, procedures = [], [], [], []
return ancestors, conditions, drugs, procedures, headers
if rsearch.status_code == requests.status_codes.codes.OK:
results = rsearch.json()
ancestors = sorted(results['results'], key=lambda k: k['concept_count'], reverse=True)
Expand Down Expand Up @@ -335,8 +344,6 @@ def cohd_page(concept_id):
conditions = results['Condition']
drugs = results['Drug']
procedures = results['Procedure']
headers=generate_headers()
headers={"COHDC": headers['COHDC'], "COHDA": headers['COHDA']}
return ancestors, conditions, drugs, procedures, headers

# kegg page generator
Expand Down Expand Up @@ -372,13 +379,19 @@ def kegg_page(phenname):
# pubmed page generator
def literature_page(HPOquery):
pubmed={}
headers=generate_headers()
headers={"Pubmed": headers['Pubmed']}
params1={
'db': 'pubmed',
'term': HPOquery,
'retmax': '200',
'api_key': '1ee2a8a8bf1b1b2b09e8087eb5cf16c95109',
'sort': 'relevance'}
rsearch=requests.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", params=params1)
try:
rsearch=requests.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", params=params1, timeout=45)
except requests.exceptions.Timeout:
pubmed = {}
return pubmed, headers
def generate_citations(uid):
params2={
'retmode': "json",
Expand Down Expand Up @@ -455,8 +468,6 @@ def generate_citations(uid):
if title:
publication = title + " " + authors + ". " + journal + " " + pubdate + volume + issue + pages + ". " + doi
pubmed[id1]=[publication,top25[id1]]
headers=generate_headers()
headers={"Pubmed": headers['Pubmed']}
return pubmed, headers

# tocris drugs page generator
Expand Down Expand Up @@ -657,7 +668,10 @@ def generate_nihfoa_list(HPOquery):
'type': "active",
}
# need to add https://grants.nih.gov/grants/guide/pa-files/results['filename']
rsearch=requests.get("https://search.grants.nih.gov/guide/api/data", params=params)
try:
rsearch=requests.get("https://search.grants.nih.gov/guide/api/data", params=params, timeout=30)
except requests.exceptions.Timeout:
return []
if rsearch.status_code == requests.status_codes.codes.OK:
results = rsearch.json()['data']['hits']['hits']
#print(results[0]["_source"].keys()) # we want 'title', 'docnum', 'primaryIC', 'sponsors', 'opendate', 'appreceiptdate', 'expdate' 'filename'
Expand All @@ -676,7 +690,10 @@ def generate_nihreporter_list(HPOquery):
}
payload = "&".join("%s=%s" % (k,v) for k,v in params.items())
# https://api.federalreporter.nih.gov/v1/projects/search?query=text:cleft+palate$fy:2015,2016,2017,2018,2019,2020&searchMode=Smart
rsearch=requests.get("https://api.federalreporter.nih.gov/v1/projects/search", params=payload)
try:
rsearch=requests.get("https://api.federalreporter.nih.gov/v1/projects/search", params=payload, timeout=30)
except requests.exceptions.Timeout:
return []
if rsearch.status_code == requests.status_codes.codes.OK:
results = rsearch.json()['items']
#print(results[0].keys())
Expand Down Expand Up @@ -813,7 +830,10 @@ def routes_for_indication(HPOquery):

def openfda_query(params):
payload = "&".join("%s=%s" % (k,v) for k,v in params.items())
rsearch=requests.get("https://api.fda.gov/drug/event.json", params=payload)
try:
rsearch=requests.get("https://api.fda.gov/drug/event.json", params=payload, timeout=30)
except requests.exceptions.Timeout:
results = []
print(rsearch.url)
if rsearch.status_code == requests.status_codes.codes.OK:
results = rsearch.json()['results']
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- ./elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
environment:
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "ES_JAVA_OPTS=-Xms750m -Xmx750m"
- discovery.type=single-node
ports: # Expose Elasticsearch ports
- "9300:9300"
Expand Down
4 changes: 2 additions & 2 deletions static/text/headers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ HPOPatient HPO Term The full name of the Human Phenotype Ontology term matching
COHDC Concept 2 ID The unique ID of the OHDSI term related to the search term through co-occurrence in the COHD database. Corresponds to one OHDSI term.
COHDC Concept 2 Domain The OHDSI domain for the related term, a condition, drug, or procedure.
COHDC Concept 2 Name This is the name of the OHDSI term in the Columbia Open Health Data database related to the search term through co-occurrence in patient notes. The term is unique because COHD only uses filtered SNOMED terms as the patient data comes from Columbia University Medical Center, at least for the time being. This data is up-to-date constantly as it comes directly from the API.
COHDC Chi-Square value This value is calculated using a 2x2 contigency table with four values: The number of patients with the first term and the second term, the number of patients without the first term but with the second term, the number of patients who have the first term but not the second term, and the number of patients that don't have the either term (which is a very large number, usually). The samples must be random, and the observations independent of one another, which they mostly are. The one disadvantage of this calculation is it is artificially inflated by the fourth number.
COHDC Chi-Square value This value is calculated using a 2x2 contingency table with four values: The number of patients with the first term and the second term, the number of patients without the first term but with the second term, the number of patients who have the first term but not the second term, and the number of patients that don't have either term (often a very large number). The samples must be random, and the observations independent of one another, which they mostly are. The one disadvantage of this calculation is it is artificially inflated by the fourth number.
COHDC p-value This is the p-value or probability value of the chi-square test mentioned on the left. The likelihood that these results occurred by random chance, assuming the null hypothesis is correct.
COHDC Adjusted p-value This is the Bonferroni corrected p-value based on the number of chi-square concept comparisons that are currently possible in COHD, which is ~56.8 million comparisons (5.6848043e7 exactly).
COHDC Concept 1 ID The unique ID of your original search term in OHDSI, in the COHD database. Corresponds to one OHDSI term.
Expand Down Expand Up @@ -127,7 +127,7 @@ PharosFacets Name The name of the facet (Gene Ontology term, pathway, disease, e
PharosFacets Number of Targets How many targets for the disease are attributable to the term.
PharosTargets Gene Name The full official gene name of the drug target.
PharosTargets Gene Symbol The HGNC gene symbol of the drug target.
PharosTargets UniProt ID The UniProt identifier of the drug target which can be used to search Pharos or Uniprot for more information.
PharosTargets UniProt ID The UniProt identifier of the drug target which can be used to search Pharos or UniProt for more information.
PharosTargets Data Type and Source Where the data from the drug target came from, and if applicable, the data subtype from that source.
PharosTargets Evidence This could be a few PubMed citations, an author statement, SNPs, personally curated data, or marker/mechanism evidence.
PharosTD Name The full name of the gene.
Expand Down
6 changes: 3 additions & 3 deletions templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ <h5 class="modal-title">Contact PhenCards</h5>
<!-- container -->
<div class="container">
<div class="footer col-lg-offset-1">
<p class="text-muted">All Rights Reserved @ Wang Genomics Lab 2020-
<script type="text/javascript"> document.write(new Date().getFullYear());
</script>
<p class="text-muted">All Rights Reserved @ Wang Genomics Lab 2020 -
<script type="text/javascript">document.write(new Date().getFullYear()+".");
</script> This information, while from reputable sources, is not to be taken as medical advice, and we accept no liability for doing so. Always discuss with a qualified medical provider.
</p>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion templates/patient.html
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ <h1><span style="color: #efa50e; "><b>Clinical Trials</b></span></h1>
<div class="container medium" id="Literature">
<h1><span style="color: #efa50e; "><b>Literature</b></span></h1>
<br><br>
<tr><th><span style="color: black;"> Using this unique tool, you can search for related literature for your extracted HPO terms with great ease in <a target="_blank" href="https://scholar.google.com">Google Scholar</a>. The default search includes all the exact terms separated by ORs. To increase specificity, remove the ORs (if you remove the ORs and use just the first 3 default terms you will get the paper we cited for the clinical notes). You can remove terms, move the cursor freely, and add duplicate terms. </span></th></tr>
<tr><th><span style="color: black;"> Using this unique tool, you can search for related literature for your extracted HPO terms with great ease in <a target="_blank" href="https://scholar.google.com">Google Scholar</a>. The default search includes all the exact terms separated by ORs. To increase specificity, remove the ORs (if you remove the ORs and use just the first 3 default terms you will get the paper we cited for the clinical notes). You can remove terms with Backspace, move the cursor left and right with the arrow keys, and add duplicate terms. </span></th></tr>
<br><br>
<p>
{% set tags = HPOnames|join(",OR,") %}
Expand Down
4 changes: 2 additions & 2 deletions templates/pharos.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ <h2 style="text-align: center; margin-top: 80px; font-family:'Roboto', sans-seri
<li> Gene Ontology Processes in targets related to the term
<li> Gene Ontology Components in targets related to the term
<li> Gene Ontology Functions in targets related to the term
<li> Uniprot diseases linked to the disease search term
<li> Cell tissue types expressed most in Uniprot tissue data in related targets to the term
<li> UniProt diseases linked to the disease search term
<li> Cell tissue types expressed most in UniProt tissue data in related targets to the term
</ul>
</p>
</div>
Expand Down
4 changes: 2 additions & 2 deletions templates/results.html
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ <h1><span style="color: #efa50e; "><b>Genes</b></span></h1>
No results for the first ranked HPO term in Phen2Gene.
{% endif %}
<hr class="innerbar">
<tr><th><span style="color: black; ">Here are the drug targets (genes) from the <a target="_blank" href="https://pharos.nih.gov/api">Pharos database</a><sup><a href="#pharos-cite">7</a></sup>. If you click on the gene symbol, you will go to a page that summarizes Pharos information related to the drug target, linked from DrugCentral and other resources, such as ligands and ligand information, or log2 fold change expression data by tissue and cell type. The Uniprot ID link will take you to Pharos directly, so you can explore the data further there.</span></th></tr>
<tr><th><span style="color: black; ">Here are the drug targets (genes) from the <a target="_blank" href="https://pharos.nih.gov/api">Pharos database</a><sup><a href="#pharos-cite">7</a></sup>. If you click on the gene symbol, you will go to a page that summarizes Pharos information related to the drug target, linked from DrugCentral and other resources, such as ligands and ligand information, or log2 fold change expression data by tissue and cell type. The UniProt ID link will take you to Pharos directly, so you can explore the data further there.</span></th></tr>
{% if pharos %}
{% set url = "https://pharos.nih.gov/targets/" %}
<table id="pharos" style="white-space: pre-line" class="table table-striped table-bordered table-sm">
Expand Down Expand Up @@ -591,7 +591,7 @@ <h1><span style="color: #efa50e; "><b>Support & Collaboration</b></span></h1>
No results for the search term in the Federal Reporter database (or API may be down).
{% endif %}
<hr class="innerbar">
<tr><th><span style="color: black; ">These are experts in various medical institutions that are likely to specialize directly in your phenotype from <a target="_blank" href="http://direct2experts.org">Direct2Experts</a><sup><a href="#d2e-cite">18</a></sup>. You can click on the "Number of Experts" links to reach each respective academic medical institutions results pages that are in this network. You can find MDs, PhDs, BS, MS and more specialized in your phenotypic search term.</span></th></tr>
<tr><th><span style="color: black; ">These are experts in various medical institutions that are likely to specialize directly in your phenotype from <a target="_blank" href="http://direct2experts.org">Direct2Experts</a><sup><a href="#d2e-cite">18</a></sup>. You can click on the "Number of Experts" links to reach each respective academic medical institution's results pages that are in this network. You can find MDs, PhDs, genetic counselors, BS and MS researchers and others specialized in your phenotypic search term.</span></th></tr>
<br><br>
<table id="d2e" style="white-space: pre-line" class="table table-striped table-bordered table-sm">
</table>
Expand Down

0 comments on commit 3ea938a

Please sign in to comment.