diff --git a/src/vfb_connect/cross_server_tools.py b/src/vfb_connect/cross_server_tools.py index 7fd8a82b..d9c2cf95 100644 --- a/src/vfb_connect/cross_server_tools.py +++ b/src/vfb_connect/cross_server_tools.py @@ -353,10 +353,73 @@ def get_images_by_type(self, class_expression, template, image_folder, image_type=image_type, stomp=stomp) + def get_gene_function_filters(self): + """Get list of all gene function labels. + :return: List of unique gene function labels in alphabetical order. + :rtype: list + """ + query = ("MATCH (g:Gene) RETURN DISTINCT apoc.coll.subtract(labels(g), " + "['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS gene_labels") + result = self.neo_query_wrapper._query(query) + labels = [] + for r in result: + labels.extend(r['gene_labels']) + labels = sorted(list(set(labels))) + return labels + + def get_transcriptomic_profile(self, cell_type, gene_type=False, return_dataframe=True): + """Get gene expression data for a given cell_type. + + Returns a DataFrame of gene expression data for clusters of cells annotated as cell_type (or subtypes). + Can optionally restrict to a gene_type - these can be retrieved by running get_gene_function_filters. + If no data is found, returns False. + + :param cell_type: The ID, name or symbol of a class in the Drosophila Anatomy Ontology (FBbt). + :param gene_type: Optional. A gene function label - these can be retrieved by running get_gene_function_filters(). + :return: DataFrame with gene expression data for clusters of cells annotated as cell_type (or subtypes). + :rtype: DataFrame + """ + try: + cell_type_short_form = self.lookup[cell_type].replace(':', '_') + except KeyError: + if cell_type.replace('_', ':') in self.lookup.values(): + cell_type_short_form = cell_type.replace(':', '_') + else: + raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology") + if not cell_type_short_form.startswith('FBbt'): + raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology") - - + if gene_type: + if gene_type not in self.get_gene_function_filters(): + raise KeyError("gene_type must be a valid gene function label, try running get_gene_function_filters()") + else: + gene_label = ':' + gene_type + else: + gene_label = '' + + query = ("MATCH (g:Gene:Class%s)<-[e:expresses]-(clus:Cluster:Individual)-" + "[:composed_primarily_of]->(c2:Class)-[:SUBCLASSOF*0..]->(c1:Neuron:Class) " + "WHERE c1.short_form = '%s' " + "MATCH (clus)-[:part_of]->()-[:has_part]->(sa:Sample:Individual) " + "OPTIONAL MATCH (sa)-[:part_of]->(sex:Class) " + "WHERE sex.short_form IN ['FBbt_00007011', 'FBbt_00007004'] " + "OPTIONAL MATCH (sa)-[:overlaps]->(tis:Class:Anatomy) " + "OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet:Individual) " + "OPTIONAL MATCH (ds)-[:has_reference]->(p:pub:Individual) " + "OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site:Individual) " + "RETURN DISTINCT c2.label AS cell_type, c2.short_form AS cell_type_id, " + "sex.label AS sample_sex, COLLECT(tis.label) AS sample_tissue, " + "p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, " + "apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, " + "e.expression_extent[0] as extent, toFloat(e.expression_level[0]) as level order by cell_type, g.label" + % (gene_label, cell_type_short_form)) + r = self.nc.commit_list([query]) + dc = dict_cursor(r) + if return_dataframe: + return pd.DataFrame.from_records(dc) + else: + return dc