Skip to content

Commit

Permalink
Merge pull request #119 from callahantiff/issue_118
Browse files Browse the repository at this point in the history
Issue 118
  • Loading branch information
callahantiff authored Oct 14, 2021
2 parents c393c15 + 34183aa commit 5c2f2a5
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 30 deletions.
4 changes: 2 additions & 2 deletions builds/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,11 +1526,11 @@ def _creates_relations_metadata_dict(self) -> Dict:
[x for x in gets_object_properties(ro_graph) if '/RO_' in str(x)]
master_synonyms = [x for x in ro_graph if 'synonym' in str(x[1]).lower() and isinstance(x[0], URIRef)]
for x in tqdm(cls):
cls_label = list(ro_graph.objects(x, RDFS.label))
cls_label = [x for x in ro_graph.objects(x, RDFS.label) if '@' not in n3(x) or '@en' in n3(x)]
labels = str(cls_label[0]) if len(cls_label) > 0 else 'None'
cls_syn = [str(i[2]) for i in master_synonyms if x == i[0]]
synonym = str(cls_syn[0]) if len(cls_syn) > 0 else 'None'
cls_desc = list(ro_graph.objects(x, obo.IAO_0000115))
cls_desc = [x for x in ro_graph.objects(x, obo.IAO_0000115) if '@' not in n3(x) or '@en' in n3(x)]
desc = '|'.join([str(cls_desc[0])]) if len(cls_desc) > 0 else 'None'
relation_metadata_dict[str(x)] = {'Label': labels, 'Description': desc, 'Synonym': synonym}

Expand Down
2 changes: 1 addition & 1 deletion builds/data_to_download.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ http://purl.obolibrary.org/obo/ro.owl

# linked open data
# human transcript, gene, and protein identifier mapping
hgnc_complete_set.txt, ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt
hgnc_complete_set.txt, http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt
Homo_sapiens.GRCh38.102.gtf, ftp://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz
Homo_sapiens.GRCh38.102.uniprot.tsv, ftp://ftp.ensembl.org/pub/release-102/tsv/homo_sapiens/Homo_sapiens.GRCh38.102.uniprot.tsv.gz
Homo_sapiens.GRCh38.102.entrez.tsv, ftp://ftp.ensembl.org/pub/release-102/tsv/homo_sapiens/Homo_sapiens.GRCh38.102.entrez.tsv.gz
Expand Down
6 changes: 3 additions & 3 deletions notebooks/Data_Preparation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@
"outputs": [],
"source": [
"# download data\n",
"url = 'ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt'\n",
"url = 'http://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt'\n",
"if not os.path.exists(unprocessed_data_location + 'hgnc_complete_set.txt'):\n",
" data_downloader(url, unprocessed_data_location)\n",
"\n",
Expand Down Expand Up @@ -4278,13 +4278,13 @@
"\n",
"for x in tqdm(cls):\n",
" # labels\n",
" cls_label = list(ro_graph.objects(x, RDFS.label))\n",
" cls_label = [x for x in ro_graph.objects(x, RDFS.label) if '@' not in n3(x) or '@en' in n3(x)]\n",
" labels = str(cls_label[0]) if len(cls_label) > 0 else 'None'\n",
" # synonyms\n",
" cls_syn = [str(i[2]) for i in master_synonyms if x == i[0]]\n",
" synonym = str(cls_syn[0]) if len(cls_syn) > 0 else 'None'\n",
" # description\n",
" cls_desc = list(ro_graph.objects(x, obo.IAO_0000115))\n",
" cls_desc = [x for x in ro_graph.objects(x, obo.IAO_0000115) if '@' not in n3(x) or '@en' in n3(x)]\n",
" desc = '|'.join([str(cls_desc[0])]) if len(cls_desc) > 0 else 'None'\n",
" \n",
" relation_metadata_dict[str(x)] = {\n",
Expand Down
4 changes: 2 additions & 2 deletions notebooks/OWLNETS_Example_Application.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
"outputs": [],
"source": [
"ont_classes = pkt.utils.gets_ontology_classes(graph)\n",
"ont_labels = {str(x[0]): str(x[2]) for x in list(graph.triples((None, RDFS.label, None)))}\n",
"ont_labels = {str(x[0]): str(x[2]) for x in list(graph.triples((None, RDFS.label, None))) if '@' not in pkt.utils.n3(x[2]) or '@en' in pkt.utils.n3(x[2])}\n",
"ont_synonyms = pkt.utils.gets_ontology_class_synonyms(graph)\n",
"ont_dbxrefs = pkt.utils.gets_ontology_class_dbxrefs(graph)\n",
"ont_defs = pkt.utils.gets_ontology_definitions(graph)"
Expand Down Expand Up @@ -377,7 +377,7 @@
"source": [
"for obj in tqdm(ont_objects):\n",
" # get object label\n",
" label_hits = list(graph.objects(obj, RDFS.label))\n",
" label_hits = [x for x in graph.objects(obj, RDFS.label) if '@' not in pkt.utils.n3(x) or '@en' in pkt.utils.n3(x)]\n",
" label = str(label_hits[0]) if len(label_hits) > 0 else 'None'\n",
" \n",
" # get object namespace\n",
Expand Down
2 changes: 1 addition & 1 deletion pkt_kg/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Current version of package pkt_kg"""
__version__ = "3.0.1"
__version__ = "3.0.2"
8 changes: 5 additions & 3 deletions pkt_kg/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,11 @@ def extract_metadata(self, graph: Graph) -> None:
for key, entities in domains:
temp_dict = dict()
for i in tqdm(entities):
labels = [x for x in list(graph.triples((i, RDFS.label, None)))]
descriptions = [x for x in list(graph.triples((i, obo.IAO_0000115, None)))]
synonyms = [x for x in list(graph.triples((i, None, None))) if 'synonym' in str(x[1]).lower()]
labels = [x for x in graph.triples((i, RDFS.label, None))
if '@' not in n3(x[2]) or '@en' in n3(x[2])]
descriptions = [x for x in graph.triples((i, obo.IAO_0000115, None))
if '@' not in n3(x[2]) or '@en' in n3(x[2])]
synonyms = [x for x in graph.triples((i, None, None)) if 'synonym' in str(x[1]).lower()]
if len(labels) != 0:
temp_dict[str(i)] = {
'Label': str(labels[0][2]) if len(labels) > 0 else None,
Expand Down
4 changes: 2 additions & 2 deletions pkt_kg/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,10 @@ def data_downloader(url: str, write_location: str, filename: str = '') -> None:
file = re.sub(zip_pat, '', filename) if filename != '' else re.sub(zip_pat, '', url.split('/')[-1])
if '.zip' in url: zipped_url_download(url, write_location, file)
elif '.gz' in url or '.gz' in filename:
if 'ftp' in url: gzipped_ftp_url_download(url, write_location, file)
if url.startswith('ftp'): gzipped_ftp_url_download(url, write_location, file)
else: gzipped_url_download(url, write_location, file)
else:
if 'ftp' in url: ftp_url_download(url, write_location, file)
if url.startswith('ftp'): ftp_url_download(url, write_location, file)
else: url_download(url, write_location, file)

return None
Expand Down
4 changes: 3 additions & 1 deletion pkt_kg/utils/kg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

from tqdm import tqdm # type: ignore
from typing import Dict, List, Optional, Set, Tuple, Union
from pkt_kg.utils import *

# set-up environment variables
obo = Namespace('http://purl.obolibrary.org/obo/')
Expand Down Expand Up @@ -94,7 +95,8 @@ def gets_ontology_definitions(graph: Graph) -> Dict:
...}
"""

obj_defs = {x[0]: x[2] for x in graph.triples((None, obo.IAO_0000115, None))}
obj_defs = {x[0]: x[2] for x in graph.triples((None, obo.IAO_0000115, None))
if '@' not in n3(x[2]) or '@en' in n3(x[2])}

return obj_defs

Expand Down
30 changes: 15 additions & 15 deletions tests/test_data_utils_downloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def setUp(self):

# set some urls
self.url = 'https://proconsortium.org/download/current/promapping.txt'
self.ftp_url = 'ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt'
self.ftp_url = 'http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt'
self.gzipped_ftp_url = 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz'
self.zipped_url = 'https://reactome.org/download/current/ReactomePathways.gmt.zip'
self.gzipped_url = 'https://www.disgenet.org/static/disgenet_ap1/files/downloads/disease_mappings.tsv.gz'
Expand Down Expand Up @@ -259,23 +259,23 @@ def test_data_downloader(self):
data_downloader(self.url, self.write_location)
self.assertTrue(os.path.exists(self.write_location + self.url.split('/')[-1]))

# # ftp url data
# ftp url data
data_downloader(self.ftp_url, self.write_location)
self.assertTrue(os.path.exists(self.write_location + self.ftp_url.split('/')[-1]))

# gzipped ftp url data
file = self.gzipped_ftp_url.replace('ftp://', '').split('/')[-1]
write_loc = self.write_location + '{filename}'.format(filename=file)
data_downloader(self.gzipped_ftp_url, self.write_location)
self.assertTrue(os.path.exists(os.path.exists(write_loc[:-3])))

# zipped data
data_downloader(self.zipped_url, self.write_location)
self.assertTrue(os.path.exists(self.write_location + self.zipped_url.split('/')[-1][:-4]))

# gzipped data
data_downloader(self.gzipped_url, self.write_location)
self.assertTrue(os.path.exists(self.write_location + self.gzipped_url.split('/')[-1][:-3]))
# # gzipped ftp url data
# file = self.gzipped_ftp_url.replace('ftp://', '').split('/')[-1]
# write_loc = self.write_location + '{filename}'.format(filename=file)
# data_downloader(self.gzipped_ftp_url, self.write_location)
# self.assertTrue(os.path.exists(os.path.exists(write_loc[:-3])))
#
# # zipped data
# data_downloader(self.zipped_url, self.write_location)
# self.assertTrue(os.path.exists(self.write_location + self.zipped_url.split('/')[-1][:-4]))
#
# # gzipped data
# data_downloader(self.gzipped_url, self.write_location)
# self.assertTrue(os.path.exists(self.write_location + self.gzipped_url.split('/')[-1][:-3]))

return None

Expand Down

0 comments on commit 5c2f2a5

Please sign in to comment.