From b4f2d314198ee9651f64f139ea1e8adf3e4250e1 Mon Sep 17 00:00:00 2001 From: Yasemin Bridges Date: Wed, 13 Nov 2024 13:51:49 +0000 Subject: [PATCH 1/2] allow specification of gene identifier --- src/phenotype2phenopacket/add/add_genes.py | 9 ++++++--- src/phenotype2phenopacket/cli_add.py | 10 ++++++++++ .../utils/phenopacket_utils.py | 4 ++-- tests/test_phenopacket_utils.py | 14 +++++++------- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/phenotype2phenopacket/add/add_genes.py b/src/phenotype2phenopacket/add/add_genes.py index 2acf443..bbe14ba 100644 --- a/src/phenotype2phenopacket/add/add_genes.py +++ b/src/phenotype2phenopacket/add/add_genes.py @@ -1,7 +1,6 @@ from pathlib import Path import polars as pl -from phenopackets import Disease from pheval.utils.file_utils import all_files from pheval.utils.phenopacket_utils import ( GeneIdentifierUpdater, @@ -10,6 +9,7 @@ phenopacket_reader, ) +from phenopackets import Disease from phenotype2phenopacket.utils.phenopacket_utils import ( PhenopacketInterpretationExtender, PhenopacketUtil, @@ -68,19 +68,22 @@ def add_genes( ) -def add_genes_to_directory(phenopacket_dir: Path, genes_to_disease: pl.DataFrame, output_dir: Path): +def add_genes_to_directory( + phenopacket_dir: Path, genes_to_disease: pl.DataFrame, gene_identifier: str, output_dir: Path +): """ Add known gene-to-phenotype relationships to the interpretations of a directory of phenopackets. Args: phenopacket_dir (Path): Directory containing the phenopacket files. genes_to_disease (pl.DataFrame): DataFrame containing genes_to_disease.txt entries. + gene_identifier (str): Gene identifier for the phenopacket. output_dir (Path): Directory to store the updated phenopackets. """ hgnc_dict = create_hgnc_dict() identifier_map = create_gene_identifier_map() gene_identifier_updater = GeneIdentifierUpdater( - gene_identifier="ensembl_id", hgnc_data=hgnc_dict, identifier_map=identifier_map + gene_identifier=gene_identifier, hgnc_data=hgnc_dict, identifier_map=identifier_map ) for phenopacket_path in all_files(phenopacket_dir): add_genes(phenopacket_path, genes_to_disease, gene_identifier_updater, output_dir) diff --git a/src/phenotype2phenopacket/cli_add.py b/src/phenotype2phenopacket/cli_add.py index af642ca..7fc1fbf 100644 --- a/src/phenotype2phenopacket/cli_add.py +++ b/src/phenotype2phenopacket/cli_add.py @@ -28,9 +28,17 @@ help="Path to output directory.", type=Path, ) +@click.option( + "--gene-identifier", + "-i", + required=False, + help="Gene identifier to update in phenopacket", + type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]), +) def add_genes_command( phenopacket_dir: Path, genes_to_disease: Path, + gene_identifier: str, output_dir: Path, ): """ @@ -39,6 +47,7 @@ def add_genes_command( Args: phenopacket_dir (Path): Directory containing the phenopacket files. genes_to_disease (Path): Path to the genes_to_disease.txt file. + gene_identifier (str): Gene identifier to add in phenopacket output_dir (Path): Directory to store the updated phenopackets. """ output_dir.mkdir(exist_ok=True) @@ -46,5 +55,6 @@ def add_genes_command( add_genes_to_directory( phenopacket_dir, genes_to_disease_df, + gene_identifier, output_dir, ) diff --git a/src/phenotype2phenopacket/utils/phenopacket_utils.py b/src/phenotype2phenopacket/utils/phenopacket_utils.py index 540e258..147b0b4 100644 --- a/src/phenotype2phenopacket/utils/phenopacket_utils.py +++ b/src/phenotype2phenopacket/utils/phenopacket_utils.py @@ -11,6 +11,8 @@ import polars as pl from google.protobuf.timestamp_pb2 import Timestamp from oaklib.implementations import ProntoImplementation +from pheval.utils.phenopacket_utils import GeneIdentifierUpdater, create_json_message + from phenopackets import ( Age, Diagnosis, @@ -26,8 +28,6 @@ Resource, TimeElement, ) -from pheval.utils.phenopacket_utils import GeneIdentifierUpdater, create_json_message - from phenotype2phenopacket.utils.utils import is_float diff --git a/tests/test_phenopacket_utils.py b/tests/test_phenopacket_utils.py index 9a2059c..7534ae1 100644 --- a/tests/test_phenopacket_utils.py +++ b/tests/test_phenopacket_utils.py @@ -3,6 +3,13 @@ from unittest.mock import Mock, patch import polars as pl +from pheval.utils.phenopacket_utils import ( + GeneIdentifierUpdater, + create_gene_identifier_map, + create_hgnc_dict, +) +from polars.testing import assert_frame_equal + from phenopackets import ( Age, Diagnosis, @@ -19,13 +26,6 @@ Resource, TimeElement, ) -from pheval.utils.phenopacket_utils import ( - GeneIdentifierUpdater, - create_gene_identifier_map, - create_hgnc_dict, -) -from polars.testing import assert_frame_equal - from phenotype2phenopacket.utils.phenopacket_utils import ( OnsetTerm, PhenopacketInterpretationExtender, From 1e96ca3e0b8f4de35d8e5e6f002dd6ba91d16cdb Mon Sep 17 00:00:00 2001 From: Yasemin Bridges Date: Wed, 13 Nov 2024 13:52:04 +0000 Subject: [PATCH 2/2] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 873201e..ad4e214 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "phenotype2phenopacket" -version = "0.6.3" +version = "0.6.4" description = "" authors = ["Yasemin Bridges "] readme = "README.md"