Skip to content

Commit

Permalink
Merge pull request #208 from VirtualFlyBrain/hotfix1
Browse files Browse the repository at this point in the history
Improved handling of xrefs
  • Loading branch information
Robbie1977 authored Sep 5, 2024
2 parents 9a2d963 + 5b849f8 commit 49c1b3c
Show file tree
Hide file tree
Showing 7 changed files with 921 additions and 748 deletions.
1,602 changes: 860 additions & 742 deletions docs/source/tutorials/connectomics.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/vfb_connect.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: vfb_connect
Version: 2.2.7.dev7+f46d531.dirty
Version: 2.2.10.dev8+970cf66.dirty
Summary: Wrapper for querying VirtualFlyBrain servers.
Home-page: https://github.com/VirtualFlyBrain/VFB_connect
Author: David Osumi-Sutherland
Expand Down
42 changes: 39 additions & 3 deletions src/vfb_connect/cross_server_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from colormath.color_conversions import convert_color
from scipy.spatial import KDTree

VFB_DBS_2_SYMBOLS = {"JRC_OpticLobe":"neuprint_JRC_OpticLobe_v1_0_1", "FAFB":"catmaid_fafb", "L1EM":"catmaid_l1em", "MANC":"neuprint_JRC_Manc_1_2_1",
"FlyEM-HB":"neuprint_JRC_Hemibrain_1point1","ol":"neuprint_JRC_OpticLobe_v1_0_1", "fafb":"catmaid_fafb", "l1em":"catmaid_l1em",
"fw":"flywire783", "mv":"neuprint_JRC_Manc_1_2_1", "hb":"neuprint_JRC_Hemibrain_1point1"}


def gen_short_form(iri):
"""Generate short_form (string) from an IRI string.
Expand Down Expand Up @@ -811,6 +815,12 @@ def get_terms_by_xref(self, xrefs: iter, db='', summary=True, return_dataframe=T
`return_dataframe` is `True` and `summary` is `True`.
:rtype: list of dicts or pandas.DataFrame
"""
if isinstance(xrefs, str):
xrefs = [xrefs]

if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]

return self.neo_query_wrapper.get_terms_by_xref(xrefs, db=db, summary=summary, return_dataframe=False)

def xref_2_vfb_id(self, acc=None, db='', id_type='', reverse_return=False, return_just_ids=True, verbose=False):
Expand Down Expand Up @@ -843,7 +853,10 @@ def xref_2_vfb_id(self, acc=None, db='', id_type='', reverse_return=False, retur
else:
new_acc.append(xref.split(':')[-1])
acc = new_acc
if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]
result = self.neo_query_wrapper.xref_2_vfb_id(acc=acc, db=db, id_type=id_type, reverse_return=reverse_return, verbose=verbose)
print(result) if verbose else None
if return_just_ids & reverse_return:
return [x.key for x in result]
if return_just_ids and not reverse_return:
Expand Down Expand Up @@ -899,8 +912,7 @@ def get_TermInfo(self, short_forms: iter, summary=True, cache=True, return_dataf
print(short_forms) if verbose else None
return self.neo_query_wrapper.get_TermInfo(short_forms, summary=summary, cache=cache, return_dataframe=False, limit=limit, verbose=verbose)

@batch_query
def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
def vfb_id_2_xrefs(self, vfb_id, db='', id_type='', reverse_return=False, verbose=False, datasource_only=True):
"""Map a list of short_form IDs in VFB to external DB IDs
:param vfb_id: An iterable (e.g. a list) of VFB short_form IDs.
Expand All @@ -912,7 +924,31 @@ def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
Return if `reverse_return` is `True`:
dict { acc : [{ db: <db> : vfb_id : <VFB_id> }
"""
return self.neo_query_wrapper.vfb_id_2_xrefs(vfb_id=vfb_id, db=db, id_type=id_type, reverse_return=reverse_return)
if isinstance(vfb_id, str):
vfb_id = [vfb_id]
if db in VFB_DBS_2_SYMBOLS.keys():
db = VFB_DBS_2_SYMBOLS[db]
print(f"vfb_id_2_xrefs: {vfb_id}, {db}, {id_type}, {reverse_return}") if verbose else None
result = self.neo_query_wrapper.vfb_id_2_xrefs(vfb_id=vfb_id, db=db, id_type=id_type, reverse_return=False, verbose=verbose, datasource_only=datasource_only)
print(f"Returned: {result}") if verbose else None
rl = {}
if reverse_return:
for id in vfb_id:
if id not in result.keys():
print(f"No match found for {id}")
else:
for r in result[id]:
rl[":".join([r['db'], r['acc']])] = id
else:
for id in vfb_id:
if id not in result.keys():
print(f"No match found for {id}")
else:
rl[id] = []
for r in result[id]:
rl[id].append(":".join([r['db'], r['acc']]))
print(rl) if verbose else None
return rl

def get_dbs(self, include_symbols=True, data_sources_only=True, verbose=False):
"""Get all external databases in the database, optionally filtering by data sources and including symbols.
Expand Down
9 changes: 7 additions & 2 deletions src/vfb_connect/neo/query_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def get_templates(self, summary=True, return_dataframe=True, include_symbols=Fal
short_forms.extend([d['s'] for d in dc if d['s']])
return self.get_anatomical_individual_TermInfo(short_forms, summary=summary, return_dataframe=return_dataframe)

def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False, verbose=False, datasource_only=False):
"""Map a list of short_form IDs in VFB to external DB IDs
:param vfb_id: An iterable (e.g. a list) of VFB short_form IDs.
Expand All @@ -422,14 +422,19 @@ def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
clause2 = ''
if id_type:
clause2 = "AND r.id_type = '%s'" % id_type
if datasource_only:
clause2 = "AND s.is_data_source = [True]"
ret = "RETURN i.short_form as key, " \
"collect({ db: s.short_form, acc: r.accession[0]}) as mapping"
if reverse_return:
ret = "RETURN r.accession[0] as key, " \
"collect({ db: s.short_form, vfb_id: i.short_form }) as mapping"
"collect({ db: CASE WHEN s.symbol IS NOT NULL AND size(s.symbol) > 0 AND NOT s.symbol[0] = '' THEN s.symbol[0] ELSE s.short_form END, vfb_id: i.short_form }) as mapping"
q = ' '.join([match, clause1, clause2, ret])
print(q) if verbose else None
dc = self._query(q)
print(dc) if verbose else None
mapping = {d['key']: d['mapping'] for d in dc}
print(mapping) if verbose else None
unmapped = set(vfb_id)-set(mapping.keys())
if unmapped:
print("33mWarning:\033[0m The following IDs do not match DB &/or id_type constraints: %s" % str(unmapped))
Expand Down
1 change: 1 addition & 0 deletions src/vfb_connect/schema/test/vfb_term_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ def test_vfbterm_xref(self):
self.assertTrue(term.xref_id)
print(dir(term))
print(term.xref_id)
print(self.vfb.xref_2_vfb_id(term.xref_id, return_just_ids=True, verbose=True))
self.assertEqual(self.vfb.xref_2_vfb_id(term.xref_id, return_just_ids=True)[0], term.id)

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions src/vfb_connect/schema/vfb_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -1667,6 +1667,7 @@ def __init__(self, id=None, term: Optional[Term] = None, related_terms: Optional
if xref.is_data_source:
self.data_source = xref.site_name
self.xref_id = xref.id
self.xref_accession = xref.accession if hasattr(xref, 'accession') else None
self.xref_url = xref.link if hasattr(xref, 'link') and xref.link else xref.homepage
self.xref_name = xref.name

Expand Down
12 changes: 12 additions & 0 deletions src/vfb_connect/test/cross_server_tools_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,18 @@ def test_nt_receptors_in_downstream_neurons(self):
print(bar)
self.assertTrue(len(bar) > 9)

def test_xref_to_id(self):
fu = self.vc.xref_2_vfb_id('FlyEM-HB:1353544607')
self.assertTrue(fu)
print(fu)
self.assertTrue(fu == ['VFB_jrchk3bp'])

def test_id_to_xref(self):
fu = self.vc.vfb_id_2_xrefs('VFB_jrchk3bp', verbose=True)
self.assertTrue(fu)
print(fu)
self.assertNotEqual(fu.keys(),['VFB_jrchk3bp'])

def test_get_neuron_pubs(self):
fu = self.vc.get_neuron_pubs('Kenyon cell')
self.assertTrue(len(fu)> 9)
Expand Down

0 comments on commit 49c1b3c

Please sign in to comment.