From 46e7589e8ecc390ebdd925ef906e29bcb79133f8 Mon Sep 17 00:00:00 2001 From: Vedanth Date: Tue, 13 Aug 2024 10:57:06 +0530 Subject: [PATCH] RFCT remove groot_ref_db parameter in map_to_aro map_to_aro() was given a new parameter, groot_ref_db, so that the reference database used with groot can be taken into account when searching a gene in groot ARO mapping. Now, the groot_ref_db is passed in with the 'database' parameter, simplifying the API. --- argnorm/lib.py | 32 ++++++++++++-------------------- docs/api.md | 9 ++++----- docs/index.md | 16 +++++++++++----- tests/test_lib.py | 8 ++++---- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/argnorm/lib.py b/argnorm/lib.py index 5fa0812..704e6ae 100644 --- a/argnorm/lib.py +++ b/argnorm/lib.py @@ -20,10 +20,6 @@ 'resfinder', 'resfinderfg', 'sarg', - 'groot', -] - -groot_ref_databases = [ 'groot-db', 'groot-core-db', 'groot-argannot', @@ -79,14 +75,13 @@ def get_aro_mapping_table(database): aro_mapping_table['ARO'] = aro_mapping_table['ARO'].map(lambda a: f'ARO:{a}', na_action='ignore') return aro_mapping_table -def map_to_aro(gene, database, groot_ref_db=None): +def map_to_aro(gene, database): """ Description: Gets ARO mapping for a specific gene in a database. Parameters: gene (str): The original ID of the gene as mentioned in source database. - database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, and groot - groot_ref_db (str, optional): name of reference db used by groot. Can be groot-argannot, groot-resfinder, groot-card, groot-core-db, or groot-db + database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, groot-card Returns: ARO[result] (pronto.term.Term): A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute. @@ -96,23 +91,20 @@ def map_to_aro(gene, database, groot_ref_db=None): if database not in DATABASES: raise Exception(f'{database} is not a supported database.') - if 'groot' in database and not groot_ref_db in groot_ref_databases: - raise Exception(f'{groot_ref_db} is not a valid groot reference database') mapping_table = get_aro_mapping_table(database) # Preprocess input gene & mapping table original ids if groot is being used - if 'groot' in database: - if groot_ref_db == 'groot-argannot': - gene = gene.split('~~~')[-1] - mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3])) - if groot_ref_db == 'groot-card': - gene = gene.split('.')[0] - if groot_ref_db in ['groot-db', 'groot-core-db']: - if 'card' in gene.lower(): - gene = gene.split('|')[-1] - else: - gene = gene.split('__')[1] + if database == 'groot-argannot': + gene = gene.split('~~~')[-1] + mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3])) + if database == 'groot-card': + gene = gene.split('.')[0] + if database in ['groot-db', 'groot-core-db']: + if 'card' in gene.lower(): + gene = gene.split('|')[-1] + else: + gene = gene.split('__')[1] try: result = mapping_table.loc[gene, 'ARO'] diff --git a/docs/api.md b/docs/api.md index ef0533d..f0d23cc 100644 --- a/docs/api.md +++ b/docs/api.md @@ -10,8 +10,7 @@ A list of supported databases. #### Parameters * gene (str): The original ID of the gene as mentioned in source database. -* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg and sarg -* groot_ref_db (str, optional): name of reference database used by groot. Can be: groot-argannot, groot-resfinder, groot-card, groot-db, or groot-core-db +* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card #### Returns * pronto.term.Term: A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute. @@ -26,14 +25,14 @@ from argnorm.lib import map_to_aro # Mapping the `ARR-2_1_HQ141279` gene from the `resfinder` database to the ARO print(map_to_aro('ARR-2_1_HQ141279', 'resfinder')) -# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` reference database -print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot', 'groot-argannot')) +# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` database +print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot-argannot')) ``` ### argnorm.lib.get_aro_mapping_table(): gets ARO mapping table for a specific database #### Parameters -* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg or groot +* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card #### Returns * pandas.DataFrame: A pandas dataframe with ARGs mapped to AROs. diff --git a/docs/index.md b/docs/index.md index 88f38d7..7058399 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ [![Python package](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml/badge.svg)](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml) [![Downloads](https://pepy.tech/badge/argNorm)](https://pepy.tech/project/argNorm) -![](https://img.shields.io/badge/status-alpha-red?style=flat) +![](https://img.shields.io/badge/status-beta-yellow?style=flat) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/argnorm/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/platforms.svg)](https://anaconda.org/bioconda/argnorm) [![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/license.svg)](https://anaconda.org/bioconda/argnorm) @@ -43,6 +43,7 @@ The `resistance_to_drug_classes` column will contain ARO numbers of the broader - [ABRicate](https://github.com/tseemann/abricate) (v1.0.1) with NCBI (v3.6), ResFinder (v4.1.11), MEGARes (v2.0), ARG-ANNOT (v5), ResFinderFG (v2) - [ResFinder](https://bitbucket.org/genomicepidemiology/resfinder/src/master/) (v4.0) - [AMRFinderPlus](https://github.com/ncbi/amr) (v3.10.30) +- [GROOT](https://github.com/will-rowe/groot) (v1.1.2) ## Installation argNorm can be installed using pip: @@ -72,6 +73,7 @@ The only positional argument required is `tool` which can be: - `abricate` - `resfinder` - `amrfinderplus` +- `groot` The available options are: - `-h` or `--help`: shows available options and exits. @@ -82,6 +84,7 @@ The available options are: - DeepARG (`deeparg`) - MEGARes (`megares`) - ARG-ANNOT (`argannot`) + - `groot-core-db`, `groot-db`, `groot-resfinder`, `groot-argannot`, `groot-card` - `--hamronized`: use this if the input is hamronized by [hAMRonization](https://github.com/pha4ge/hAMRonization) - `-i` or `--input`: path to the annotation result - `-o` or `--output`: the file to save normalization results @@ -90,17 +93,20 @@ Use `argnorm -h` or `argnorm --help` to see available options. ```bash >argnorm -h -usage: argnorm [-h] [--db {sarg,ncbi,resfinder,deeparg,megares,argannot}] [--hamronized] [-i INPUT] [-o OUTPUT] {argsoap,abricate,deeparg,resfinder,amrfinderplus} +usage: argnorm [-h] + [--db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card}] + [--hamronized] [-i INPUT] [-o OUTPUT] + {argsoap,abricate,deeparg,resfinder,amrfinderplus,groot} argNorm normalizes ARG annotation results from different tools and databases to the same ontology, namely ARO (Antibiotic Resistance Ontology). positional arguments: - {argsoap,abricate,deeparg,resfinder,amrfinderplus} + {argsoap,abricate,deeparg,resfinder,amrfinderplus,groot} The tool you used to do ARG annotation. -options: +optional arguments: -h, --help show this help message and exit - --db {sarg,ncbi,resfinder,deeparg,megares,argannot} + --db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card} The database you used to do ARG annotation. --hamronized Use this if the input is hamronized (processed using the hAMRonization tool) -i INPUT, --input INPUT diff --git a/tests/test_lib.py b/tests/test_lib.py index 3690f8c..d610dbe 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -11,9 +11,9 @@ def test_map_to_aro(): ["(Phe)cpt_strepv:U09991:AAB36569:1412-1948:537", "argannot"], ["MEG_4060|Metals|Multi-metal_resistance|Multi-metal_resistance_protein|MREA", "megares"], ["gi:447201629:ref:WP_001278885.1:|FEATURES|cob(I)alamin_adenolsyltransferase|unclassified|cob(I)alamin_adenolsyltransferase", "deeparg"], - ["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot', 'groot-argannot'], - ["ErmF.3000498.M17124.1181-1982.593", 'groot', 'groot-card'], - ["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot', 'groot-db'] + ["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot-argannot'], + ["ErmF.3000498.M17124.1181-1982.593", 'groot-card'], + ["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot-db'] ] ARO = lib.get_aro_ontology() @@ -36,7 +36,7 @@ def test_map_to_aro(): else: assert map_to_aro(t[0], t[1]) == e -@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot']) +@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot', 'groot-argannot']) def test_get_aro_mapping_table_smoke(database): df = get_aro_mapping_table(database) assert len(df) > 0