Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFCT remove groot_ref_db parameter in map_to_aro #63

Merged
merged 1 commit into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 12 additions & 20 deletions argnorm/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@
'resfinder',
'resfinderfg',
'sarg',
'groot',
]

groot_ref_databases = [
'groot-db',
'groot-core-db',
'groot-argannot',
Expand Down Expand Up @@ -79,14 +75,13 @@ def get_aro_mapping_table(database):
aro_mapping_table['ARO'] = aro_mapping_table['ARO'].map(lambda a: f'ARO:{a}', na_action='ignore')
return aro_mapping_table

def map_to_aro(gene, database, groot_ref_db=None):
def map_to_aro(gene, database):
"""
Description: Gets ARO mapping for a specific gene in a database.

Parameters:
gene (str): The original ID of the gene as mentioned in source database.
database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, and groot
groot_ref_db (str, optional): name of reference db used by groot. Can be groot-argannot, groot-resfinder, groot-card, groot-core-db, or groot-db
database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, groot-card

Returns:
ARO[result] (pronto.term.Term): A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute.
Expand All @@ -96,23 +91,20 @@ def map_to_aro(gene, database, groot_ref_db=None):

if database not in DATABASES:
raise Exception(f'{database} is not a supported database.')
if 'groot' in database and not groot_ref_db in groot_ref_databases:
raise Exception(f'{groot_ref_db} is not a valid groot reference database')

mapping_table = get_aro_mapping_table(database)

# Preprocess input gene & mapping table original ids if groot is being used
if 'groot' in database:
if groot_ref_db == 'groot-argannot':
gene = gene.split('~~~')[-1]
mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3]))
if groot_ref_db == 'groot-card':
gene = gene.split('.')[0]
if groot_ref_db in ['groot-db', 'groot-core-db']:
if 'card' in gene.lower():
gene = gene.split('|')[-1]
else:
gene = gene.split('__')[1]
if database == 'groot-argannot':
gene = gene.split('~~~')[-1]
mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3]))
if database == 'groot-card':
gene = gene.split('.')[0]
if database in ['groot-db', 'groot-core-db']:
if 'card' in gene.lower():
gene = gene.split('|')[-1]
else:
gene = gene.split('__')[1]

try:
result = mapping_table.loc[gene, 'ARO']
Expand Down
9 changes: 4 additions & 5 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ A list of supported databases.

#### Parameters
* gene (str): The original ID of the gene as mentioned in source database.
* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg and sarg
* groot_ref_db (str, optional): name of reference database used by groot. Can be: groot-argannot, groot-resfinder, groot-card, groot-db, or groot-core-db
* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card

#### Returns
* pronto.term.Term: A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute.
Expand All @@ -26,14 +25,14 @@ from argnorm.lib import map_to_aro
# Mapping the `ARR-2_1_HQ141279` gene from the `resfinder` database to the ARO
print(map_to_aro('ARR-2_1_HQ141279', 'resfinder'))

# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` reference database
print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot', 'groot-argannot'))
# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` database
print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot-argannot'))
```

### argnorm.lib.get_aro_mapping_table(): gets ARO mapping table for a specific database

#### Parameters
* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg or groot
* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card

#### Returns
* pandas.DataFrame: A pandas dataframe with ARGs mapped to AROs.
Expand Down
16 changes: 11 additions & 5 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Python package](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml/badge.svg)](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml)
[![Downloads](https://pepy.tech/badge/argNorm)](https://pepy.tech/project/argNorm)
![](https://img.shields.io/badge/status-alpha-red?style=flat)
![](https://img.shields.io/badge/status-beta-yellow?style=flat)
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/argnorm/README.html)
[![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/platforms.svg)](https://anaconda.org/bioconda/argnorm)
[![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/license.svg)](https://anaconda.org/bioconda/argnorm)
Expand Down Expand Up @@ -43,6 +43,7 @@ The `resistance_to_drug_classes` column will contain ARO numbers of the broader
- [ABRicate](https://github.com/tseemann/abricate) (v1.0.1) with NCBI (v3.6), ResFinder (v4.1.11), MEGARes (v2.0), ARG-ANNOT (v5), ResFinderFG (v2)
- [ResFinder](https://bitbucket.org/genomicepidemiology/resfinder/src/master/) (v4.0)
- [AMRFinderPlus](https://github.com/ncbi/amr) (v3.10.30)
- [GROOT](https://github.com/will-rowe/groot) (v1.1.2)

## Installation
argNorm can be installed using pip:
Expand Down Expand Up @@ -72,6 +73,7 @@ The only positional argument required is `tool` which can be:
- `abricate`
- `resfinder`
- `amrfinderplus`
- `groot`

The available options are:
- `-h` or `--help`: shows available options and exits.
Expand All @@ -82,6 +84,7 @@ The available options are:
- DeepARG (`deeparg`)
- MEGARes (`megares`)
- ARG-ANNOT (`argannot`)
- `groot-core-db`, `groot-db`, `groot-resfinder`, `groot-argannot`, `groot-card`
- `--hamronized`: use this if the input is hamronized by [hAMRonization](https://github.com/pha4ge/hAMRonization)
- `-i` or `--input`: path to the annotation result
- `-o` or `--output`: the file to save normalization results
Expand All @@ -90,17 +93,20 @@ Use `argnorm -h` or `argnorm --help` to see available options.

```bash
>argnorm -h
usage: argnorm [-h] [--db {sarg,ncbi,resfinder,deeparg,megares,argannot}] [--hamronized] [-i INPUT] [-o OUTPUT] {argsoap,abricate,deeparg,resfinder,amrfinderplus}
usage: argnorm [-h]
[--db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card}]
[--hamronized] [-i INPUT] [-o OUTPUT]
{argsoap,abricate,deeparg,resfinder,amrfinderplus,groot}

argNorm normalizes ARG annotation results from different tools and databases to the same ontology, namely ARO (Antibiotic Resistance Ontology).

positional arguments:
{argsoap,abricate,deeparg,resfinder,amrfinderplus}
{argsoap,abricate,deeparg,resfinder,amrfinderplus,groot}
The tool you used to do ARG annotation.

options:
optional arguments:
-h, --help show this help message and exit
--db {sarg,ncbi,resfinder,deeparg,megares,argannot}
--db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card}
The database you used to do ARG annotation.
--hamronized Use this if the input is hamronized (processed using the hAMRonization tool)
-i INPUT, --input INPUT
Expand Down
8 changes: 4 additions & 4 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ def test_map_to_aro():
["(Phe)cpt_strepv:U09991:AAB36569:1412-1948:537", "argannot"],
["MEG_4060|Metals|Multi-metal_resistance|Multi-metal_resistance_protein|MREA", "megares"],
["gi:447201629:ref:WP_001278885.1:|FEATURES|cob(I)alamin_adenolsyltransferase|unclassified|cob(I)alamin_adenolsyltransferase", "deeparg"],
["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot', 'groot-argannot'],
["ErmF.3000498.M17124.1181-1982.593", 'groot', 'groot-card'],
["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot', 'groot-db']
["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot-argannot'],
["ErmF.3000498.M17124.1181-1982.593", 'groot-card'],
["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot-db']
]

ARO = lib.get_aro_ontology()
Expand All @@ -36,7 +36,7 @@ def test_map_to_aro():
else:
assert map_to_aro(t[0], t[1]) == e

@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot'])
@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot', 'groot-argannot'])
def test_get_aro_mapping_table_smoke(database):
df = get_aro_mapping_table(database)
assert len(df) > 0
Expand Down
Loading