From d0ebf52e626984f770e8a10bc8523ff3c8e31f93 Mon Sep 17 00:00:00 2001 From: Clare72 Date: Tue, 5 Sep 2023 10:12:57 +0100 Subject: [PATCH 1/6] fix lookup --- src/vfb_connect/neo/neo4j_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vfb_connect/neo/neo4j_tools.py b/src/vfb_connect/neo/neo4j_tools.py index 4066aa16..4a237cc6 100644 --- a/src/vfb_connect/neo/neo4j_tools.py +++ b/src/vfb_connect/neo/neo4j_tools.py @@ -237,7 +237,7 @@ def get_lookup(self, limit_by_prefix=None, include_individuals=False, q = self.commit_list([property_lookup_query]) out.extend(dict_cursor(q)) lookup = {x['name']: x['id'].replace('_', ':') for x in out} - lookup.update({x['id']: x['id'].replace('_', ':') for x in out}) + lookup.update({x['name']: x['id'].replace('_', ':') for x in out}) # print(lookup['neuron']) return lookup From 4dffae3697d84db219c5e533041cc6365a7b764a Mon Sep 17 00:00:00 2001 From: Clare72 Date: Tue, 5 Sep 2023 10:13:32 +0100 Subject: [PATCH 2/6] gene label lookup function --- src/vfb_connect/cross_server_tools.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/vfb_connect/cross_server_tools.py b/src/vfb_connect/cross_server_tools.py index 7fd8a82b..50c7223b 100644 --- a/src/vfb_connect/cross_server_tools.py +++ b/src/vfb_connect/cross_server_tools.py @@ -353,6 +353,20 @@ def get_images_by_type(self, class_expression, template, image_folder, image_type=image_type, stomp=stomp) + def get_gene_function_filters(self): + """Get list of all gene function labels. + + :return: List of unique gene function labels in alphabetical order. + :rtype: list + """ + query = ("MATCH (g:Gene) RETURN DISTINCT apoc.coll.subtract(labels(g), " + "['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS gene_labels") + result = self.neo_query_wrapper._query(query) + labels = [] + for r in result: + labels.extend(r['gene_labels']) + labels = sorted(list(set(labels))) + return labels From 94551daf93c150581835affa0dbec0be200eff55 Mon Sep 17 00:00:00 2001 From: Clare72 Date: Tue, 5 Sep 2023 10:13:52 +0100 Subject: [PATCH 3/6] transcriptomic profile function --- src/vfb_connect/cross_server_tools.py | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/vfb_connect/cross_server_tools.py b/src/vfb_connect/cross_server_tools.py index 50c7223b..bd59edf4 100644 --- a/src/vfb_connect/cross_server_tools.py +++ b/src/vfb_connect/cross_server_tools.py @@ -368,9 +368,54 @@ def get_gene_function_filters(self): labels = sorted(list(set(labels))) return labels + def get_transcriptomic_profile(self, cell_type, gene_type=False): + """Get gene expression data for a given cell_type. + Returns a DataFrame of gene expression data for clusters of cells annotated as cell_type (or subtypes). + Can optionally restrict to a gene_type - these can be retrieved by running get_gene_function_filters. + If no data is found, returns False. + :param cell_type: The ID, name or symbol of a class in the Drosophila Anatomy Ontology (FBbt). + :param gene_type: Optional. A gene function label - these can be retrieved by running get_gene_function_filters(). + :return: DataFrame with gene expression data for clusters of cells annotated as cell_type (or subtypes). + :rtype: DataFrame + """ + try: + cell_type_short_form = self.lookup[cell_type].replace(':', '_') + except KeyError: + if cell_type.replace('_', ':') in self.lookup.values(): + cell_type_short_form = cell_type.replace(':', '_') + else: + raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology") + if not cell_type_short_form.startswith('FBbt'): + raise KeyError("cell_type must be a valid ID, label or symbol from the Drosophila Anatomy Ontology") + if gene_type: + if gene_type not in self.get_gene_function_filters(): + raise KeyError("gene_type must be a valid gene function label, try running get_gene_function_filters()") + else: + gene_label = ':' + gene_type + else: + gene_label = '' + + query = ("MATCH (g:Gene%s)<-[e:expresses]-(clus:Cluster)-" + "[:composed_primarily_of]->(c2)-[:SUBCLASSOF*0..]->(c1:Neuron) " + "WHERE c1.short_form = '%s' " + "OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet) OPTIONAL MATCH (ds)-[:has_reference]->(p:pub) " + "OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site) " + "RETURN c2.label AS cell_type, c2.short_form AS cell_type_id, " + "p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, " + "apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, " + "e.expression_extent[0] as extent, e.expression_level[0] as level order by cell_type, g.label" + % (gene_label, cell_type_short_form)) + result = self.neo_query_wrapper._query(query) + if result: + result_df = pd.DataFrame.from_dict(data=result, orient='columns') + result_df['level'] = result_df['level'].astype(float) + return result_df + else: + print('No transcriptomics data for %s' % cell_type) + return False From bfda620275791d57cd7381bba11c612060912de6 Mon Sep 17 00:00:00 2001 From: Clare72 Date: Thu, 14 Sep 2023 11:33:50 +0100 Subject: [PATCH 4/6] Revert "fix lookup" This reverts commit d0ebf52e626984f770e8a10bc8523ff3c8e31f93. --- src/vfb_connect/neo/neo4j_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vfb_connect/neo/neo4j_tools.py b/src/vfb_connect/neo/neo4j_tools.py index 4a237cc6..4066aa16 100644 --- a/src/vfb_connect/neo/neo4j_tools.py +++ b/src/vfb_connect/neo/neo4j_tools.py @@ -237,7 +237,7 @@ def get_lookup(self, limit_by_prefix=None, include_individuals=False, q = self.commit_list([property_lookup_query]) out.extend(dict_cursor(q)) lookup = {x['name']: x['id'].replace('_', ':') for x in out} - lookup.update({x['name']: x['id'].replace('_', ':') for x in out}) + lookup.update({x['id']: x['id'].replace('_', ':') for x in out}) # print(lookup['neuron']) return lookup From a4a957177c1b8b3fdb3f2df123a9ac90eba506ae Mon Sep 17 00:00:00 2001 From: Clare72 Date: Thu, 14 Sep 2023 14:04:28 +0100 Subject: [PATCH 5/6] extending query to get sex and tissues --- src/vfb_connect/cross_server_tools.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/vfb_connect/cross_server_tools.py b/src/vfb_connect/cross_server_tools.py index bd59edf4..333b3a7b 100644 --- a/src/vfb_connect/cross_server_tools.py +++ b/src/vfb_connect/cross_server_tools.py @@ -400,12 +400,18 @@ def get_transcriptomic_profile(self, cell_type, gene_type=False): else: gene_label = '' - query = ("MATCH (g:Gene%s)<-[e:expresses]-(clus:Cluster)-" - "[:composed_primarily_of]->(c2)-[:SUBCLASSOF*0..]->(c1:Neuron) " + query = ("MATCH (g:Gene:Class%s)<-[e:expresses]-(clus:Cluster:Individual)-" + "[:composed_primarily_of]->(c2:Class)-[:SUBCLASSOF*0..]->(c1:Neuron:Class) " "WHERE c1.short_form = '%s' " - "OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet) OPTIONAL MATCH (ds)-[:has_reference]->(p:pub) " - "OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site) " - "RETURN c2.label AS cell_type, c2.short_form AS cell_type_id, " + "MATCH (clus)-[:part_of]->()-[:has_part]->(sa:Sample:Individual) " + "OPTIONAL MATCH (sa)-[:part_of]->(sex:Class) " + "WHERE sex.short_form IN ['FBbt_00007011', 'FBbt_00007004'] " + "OPTIONAL MATCH (sa)-[:overlaps]->(tis:Class:Anatomy) " + "OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet:Individual) " + "OPTIONAL MATCH (ds)-[:has_reference]->(p:pub:Individual) " + "OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site:Individual) " + "RETURN DISTINCT c2.label AS cell_type, c2.short_form AS cell_type_id, " + "sex.label AS sample_sex, COLLECT(tis.label) AS sample_tissue, " "p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, " "apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, " "e.expression_extent[0] as extent, e.expression_level[0] as level order by cell_type, g.label" From 1a3df3f6753e34f5872e52f3ee6268e88fe7bc64 Mon Sep 17 00:00:00 2001 From: Clare72 Date: Thu, 14 Sep 2023 14:28:37 +0100 Subject: [PATCH 6/6] edit return for consistency --- src/vfb_connect/cross_server_tools.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/vfb_connect/cross_server_tools.py b/src/vfb_connect/cross_server_tools.py index 333b3a7b..d9c2cf95 100644 --- a/src/vfb_connect/cross_server_tools.py +++ b/src/vfb_connect/cross_server_tools.py @@ -368,7 +368,7 @@ def get_gene_function_filters(self): labels = sorted(list(set(labels))) return labels - def get_transcriptomic_profile(self, cell_type, gene_type=False): + def get_transcriptomic_profile(self, cell_type, gene_type=False, return_dataframe=True): """Get gene expression data for a given cell_type. Returns a DataFrame of gene expression data for clusters of cells annotated as cell_type (or subtypes). @@ -414,14 +414,12 @@ def get_transcriptomic_profile(self, cell_type, gene_type=False): "sex.label AS sample_sex, COLLECT(tis.label) AS sample_tissue, " "p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, " "apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, " - "e.expression_extent[0] as extent, e.expression_level[0] as level order by cell_type, g.label" + "e.expression_extent[0] as extent, toFloat(e.expression_level[0]) as level order by cell_type, g.label" % (gene_label, cell_type_short_form)) - result = self.neo_query_wrapper._query(query) - if result: - result_df = pd.DataFrame.from_dict(data=result, orient='columns') - result_df['level'] = result_df['level'].astype(float) - return result_df + r = self.nc.commit_list([query]) + dc = dict_cursor(r) + if return_dataframe: + return pd.DataFrame.from_records(dc) else: - print('No transcriptomics data for %s' % cell_type) - return False + return dc