Skip to content

Commit

Permalink
Merge pull request #105 from VirtualFlyBrain/gene_functions
Browse files Browse the repository at this point in the history
transcriptomic query functions
  • Loading branch information
Clare72 authored Sep 15, 2023
2 parents a83b3d8 + 1a3df3f commit d0cfe81
Showing 1 changed file with 65 additions and 2 deletions.
67 changes: 65 additions & 2 deletions src/vfb_connect/cross_server_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,73 @@ def get_images_by_type(self, class_expression, template, image_folder,
image_type=image_type,
stomp=stomp)

def get_gene_function_filters(self):
"""Get list of all gene function labels.
:return: List of unique gene function labels in alphabetical order.
:rtype: list
"""
query = ("MATCH (g:Gene) RETURN DISTINCT apoc.coll.subtract(labels(g), "
"['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS gene_labels")
result = self.neo_query_wrapper._query(query)
labels = []
for r in result:
labels.extend(r['gene_labels'])
labels = sorted(list(set(labels)))
return labels

def get_transcriptomic_profile(self, cell_type, gene_type=False, return_dataframe=True):
"""Get gene expression data for a given cell_type.
Returns a DataFrame of gene expression data for clusters of cells annotated as cell_type (or subtypes).
Can optionally restrict to a gene_type - these can be retrieved by running get_gene_function_filters.
If no data is found, returns False.
:param cell_type: The ID, name or symbol of a class in the Drosophila Anatomy Ontology (FBbt).
:param gene_type: Optional. A gene function label - these can be retrieved by running get_gene_function_filters().
:return: DataFrame with gene expression data for clusters of cells annotated as cell_type (or subtypes).
:rtype: DataFrame
"""

try:
cell_type_short_form = self.lookup[cell_type].replace(':', '_')
except KeyError:
if cell_type.replace('_', ':') in self.lookup.values():
cell_type_short_form = cell_type.replace(':', '_')
else:
raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology")

if not cell_type_short_form.startswith('FBbt'):
raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology")



if gene_type:
if gene_type not in self.get_gene_function_filters():
raise KeyError("gene_type must be a valid gene function label, try running get_gene_function_filters()")
else:
gene_label = ':' + gene_type
else:
gene_label = ''

query = ("MATCH (g:Gene:Class%s)<-[e:expresses]-(clus:Cluster:Individual)-"
"[:composed_primarily_of]->(c2:Class)-[:SUBCLASSOF*0..]->(c1:Neuron:Class) "
"WHERE c1.short_form = '%s' "
"MATCH (clus)-[:part_of]->()-[:has_part]->(sa:Sample:Individual) "
"OPTIONAL MATCH (sa)-[:part_of]->(sex:Class) "
"WHERE sex.short_form IN ['FBbt_00007011', 'FBbt_00007004'] "
"OPTIONAL MATCH (sa)-[:overlaps]->(tis:Class:Anatomy) "
"OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet:Individual) "
"OPTIONAL MATCH (ds)-[:has_reference]->(p:pub:Individual) "
"OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site:Individual) "
"RETURN DISTINCT c2.label AS cell_type, c2.short_form AS cell_type_id, "
"sex.label AS sample_sex, COLLECT(tis.label) AS sample_tissue, "
"p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, "
"apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, "
"e.expression_extent[0] as extent, toFloat(e.expression_level[0]) as level order by cell_type, g.label"
% (gene_label, cell_type_short_form))
r = self.nc.commit_list([query])
dc = dict_cursor(r)
if return_dataframe:
return pd.DataFrame.from_records(dc)
else:
return dc

0 comments on commit d0cfe81

Please sign in to comment.