diff --git a/README.md b/README.md index f8a3325..65954eb 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,11 @@ from panelapp import Panelapp panel = Panelapp.Panel(269) # Create panel object, confidence level defaults to 3, version is the lastest panel.get_info() # Return dict with general data about the panel -panel.get_genes() # Return genes according to confidence level choosen when creating the panel object -panel.get_genes(3, 2, 1) # Return genes with confidence levels 3, 2, 1 (green, amber, red) +# For the 4 following methods you can specify the confidence levels +panel.get_genes() # Return all the gene symbols/ids +panel.get_gene_symbols() # Return gene symbols according to confidence level choosen when creating the panel object +panel.get_hgnc_ids() # Return hgnc ids according to confidence level choosen when creating the panel object +panel.get_ensembl_ids("GRCh37") # Return ensembl ids according to confidence level choosen when creating the panel object panel.update_version("3.2", "2") # Update the panel with version and confidence level given panel.is_signedoff() # Return date of signedoff or False if not signedoff panel.get_data() # Return all the data the API sent, you can use that there's something that is lacking in my methods diff --git a/panelapp/Panelapp.py b/panelapp/Panelapp.py index b5974bd..8962fca 100644 --- a/panelapp/Panelapp.py +++ b/panelapp/Panelapp.py @@ -187,40 +187,46 @@ def set_genes(self): if self.data["genes"]: for gene in self.data["genes"]: if gene["confidence_level"] == "3": - self.genes.setdefault("3", []).append(( - gene["gene_data"]["hgnc_symbol"], - gene["gene_data"]["hgnc_id"] - )) + self.genes.setdefault("3", []).append( + setup_gene(gene) + ) elif gene["confidence_level"] == "2": - self.genes.setdefault("2", []).append(( - gene["gene_data"]["hgnc_symbol"], - gene["gene_data"]["hgnc_id"] - )) + self.genes.setdefault("2", []).append( + setup_gene(gene) + ) elif gene["confidence_level"] == "1": - self.genes.setdefault("1", []).append(( - gene["gene_data"]["hgnc_symbol"], - gene["gene_data"]["hgnc_id"] - )) + self.genes.setdefault("1", []).append( + setup_gene(gene) + ) elif gene["confidence_level"] == "0": - self.genes.setdefault("0", []).append(( - gene["gene_data"]["hgnc_symbol"], - gene["gene_data"]["hgnc_id"] - )) + self.genes.setdefault("0", []).append( + setup_gene(gene) + ) - def get_genes(self, *confidence_levels: str): - """ Return list of genes - Can type 1,2,3 to get genes with appropriate confidence level to return + def select_from_genes(self, key, *confidence_levels): + """ Select correct data to return from the self.genes dict + + Args: + key (str): Key of data to return ("symbol", "hgnc_id", "ensembl_id") Returns: - list: List of genes + list: List of symbol or ids to return """ + genes_to_return = [] if confidence_levels: genes = [] for level in confidence_levels: if str(level) in self.genes: - genes.append(self.genes[str(level)]) + if key is not None: + genes.append([ + gene[key] + for gene in self.genes[str(level)] + if key in gene + ]) + else: + genes.append(self.genes[str(level)]) genes_to_return = [ gene @@ -229,12 +235,73 @@ def get_genes(self, *confidence_levels: str): ] else: if self.confidence_level in self.genes: - genes_to_return = self.genes[self.confidence_level] + if key is not None: + genes_to_return = [ + gene[key] + for gene in self.genes[str(self.confidence_level)] + if key in gene + ] + else: + genes_to_return = self.genes[str(self.confidence_level)] else: return [] return genes_to_return + def get_genes(self, *confidence_levels: str): + """ Return gene symbols + gene ids + + Returns: + list: List of dict with all the data for the genes + """ + + return self.select_from_genes(None, *confidence_levels) + + def get_gene_symbols(self, *confidence_levels: str): + """ Return list of gene symbols + Can type 0,1,2,3 to get genes with appropriate confidence level to return + + Returns: + list: List of gene symbols + """ + + return self.select_from_genes("symbol", *confidence_levels) + + def get_hgnc_ids(self, *confidence_levels: str): + """ Return list of hgnc_ids + Can type 0,1,2,3 to get genes with appropriate confidence level to return + + Returns: + list: List of hgnc ids + """ + + return self.select_from_genes("hgnc_id", *confidence_levels) + + def get_ensembl_ids(self, build, *confidence_levels: str): + """ Return list of ensembl ids with build key if no build is not passed + Can type 0,1,2,3 to get genes with appropriate confidence level to return + + Args: + build (str): Build = "GRCh37", "GRCh38" + + Returns: + list: List of ensembl ids + """ + + ensembl_ids = self.select_from_genes( + "ensembl_id", *confidence_levels + ) + + if build is not None: + ensembl_ids_to_return = [ + ensembl_id[build] + for ensembl_id in ensembl_ids + ] + else: + ensembl_ids_to_return = ensembl_ids + + return ensembl_ids_to_return + def set_cnvs(self): """ Setup the cnvs """ @@ -286,7 +353,7 @@ def get_info(self): """ info = { - "green_genes": len(self.genes["3"]), + "green_genes": len(self.symbols["3"]), "entity_types": self.data["stats"] } @@ -376,3 +443,37 @@ def __str__(self): self.signedoff, self.superpanel ) + + +def setup_gene(gene_data): + """ Create dict of data to be added in self.genes. Will contain symbol, + hgnc id, ensembl id (if provide by panelapp) + + Args: + gene_data (dict): Dict of all the gene data in Panelapp for given gene + + Returns: + dict: Dict containing gene symbol, hgnc id, ensembl id if in Panelapp + """ + data = { + "symbol": gene_data["gene_data"]["hgnc_symbol"], + "hgnc_id": gene_data["gene_data"]["hgnc_id"], + } + + if "ensembl_genes" in gene_data["gene_data"]: + if gene_data["gene_data"]["ensembl_genes"]: + # after the following keys + # ["gene_data"]["ensembl_genes"]["GRch37"], there is + # another key to denote the ensembl version + data["ensembl_id"] = { + "GRCh37": "".join([ + data["ensembl_id"] + for version, data in gene_data["gene_data"]["ensembl_genes"]["GRch37"].items() + ]), + "GRCh38": "".join([ + data["ensembl_id"] + for version, data in gene_data["gene_data"]["ensembl_genes"]["GRch38"].items() + ]) + } + + return data diff --git a/panelapp/queries.py b/panelapp/queries.py index 05ed91c..b7ae3a3 100644 --- a/panelapp/queries.py +++ b/panelapp/queries.py @@ -44,8 +44,8 @@ def compare_versions(original_panel: Panel, compare_version: str): confidence_level=original_panel.confidence_level ) - original_genes = original_panel.get_genes(1, 2, 3) - compare_genes = new_panel.get_genes(1, 2, 3) + original_genes = original_panel.get_hgnc_ids(1, 2, 3) + compare_genes = new_panel.get_hgnc_ids(1, 2, 3) matches = set(original_genes).intersection(set(compare_genes)) difference = set(original_genes).symmetric_difference(set(compare_genes)) diff --git a/setup.py b/setup.py index 04f9fd6..4cd67e7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="panelapp", - version="0.6.0", + version="0.7.0", author="Yujin Kim", author_email="yujin.kim@hotmail.fr", description="General purpose Panelapp package",