From 46e7589e8ecc390ebdd925ef906e29bcb79133f8 Mon Sep 17 00:00:00 2001
From: Vedanth <vedanth.ramji@outlook.com>
Date: Tue, 13 Aug 2024 10:57:06 +0530
Subject: [PATCH] RFCT remove groot_ref_db parameter in map_to_aro

map_to_aro() was given a new parameter, groot_ref_db, so that the reference database used with groot can be taken into account when searching a gene in groot ARO mapping. Now, the groot_ref_db is passed in with the 'database' parameter, simplifying the API.
---
 argnorm/lib.py    | 32 ++++++++++++--------------------
 docs/api.md       |  9 ++++-----
 docs/index.md     | 16 +++++++++++-----
 tests/test_lib.py |  8 ++++----
 4 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/argnorm/lib.py b/argnorm/lib.py
index 5fa0812..704e6ae 100644
--- a/argnorm/lib.py
+++ b/argnorm/lib.py
@@ -20,10 +20,6 @@
     'resfinder', 
     'resfinderfg', 
     'sarg',
-    'groot',
-]
-
-groot_ref_databases = [
     'groot-db',
     'groot-core-db',
     'groot-argannot',
@@ -79,14 +75,13 @@ def get_aro_mapping_table(database):
     aro_mapping_table['ARO'] = aro_mapping_table['ARO'].map(lambda a: f'ARO:{a}', na_action='ignore')
     return aro_mapping_table
 
-def map_to_aro(gene, database, groot_ref_db=None):
+def map_to_aro(gene, database):
     """
     Description: Gets ARO mapping for a specific gene in a database.
 
     Parameters:
         gene (str): The original ID of the gene as mentioned in source database.
-        database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, and groot
-        groot_ref_db (str, optional): name of reference db used by groot. Can be groot-argannot, groot-resfinder, groot-card, groot-core-db, or groot-db
+        database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, groot-card
 
     Returns:
         ARO[result] (pronto.term.Term): A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute.
@@ -96,23 +91,20 @@ def map_to_aro(gene, database, groot_ref_db=None):
 
     if database not in DATABASES:
         raise Exception(f'{database} is not a supported database.')
-    if 'groot' in database and not groot_ref_db in groot_ref_databases:
-        raise Exception(f'{groot_ref_db} is not a valid groot reference database')
 
     mapping_table = get_aro_mapping_table(database)
     
     # Preprocess input gene & mapping table original ids if groot is being used
-    if 'groot' in database:
-        if groot_ref_db == 'groot-argannot':
-            gene = gene.split('~~~')[-1]
-            mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3]))
-        if groot_ref_db == 'groot-card':
-            gene = gene.split('.')[0]
-        if groot_ref_db in ['groot-db', 'groot-core-db']:
-            if 'card' in gene.lower():
-                gene = gene.split('|')[-1]
-            else:
-                gene = gene.split('__')[1]
+    if database == 'groot-argannot':
+        gene = gene.split('~~~')[-1]
+        mapping_table.index = mapping_table.index.map(lambda x: ':'.join(str(x).split(':')[1:3]))
+    if database == 'groot-card':
+        gene = gene.split('.')[0]
+    if database in ['groot-db', 'groot-core-db']:
+        if 'card' in gene.lower():
+            gene = gene.split('|')[-1]
+        else:
+            gene = gene.split('__')[1]
 
     try:
         result = mapping_table.loc[gene, 'ARO']
diff --git a/docs/api.md b/docs/api.md
index ef0533d..f0d23cc 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -10,8 +10,7 @@ A list of supported databases.
 
 #### Parameters
 * gene (str): The original ID of the gene as mentioned in source database.
-* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg and sarg
-* groot_ref_db (str, optional): name of reference database used by groot. Can be: groot-argannot, groot-resfinder, groot-card, groot-db, or groot-core-db
+* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card
 
 #### Returns
 * pronto.term.Term: A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute. 
@@ -26,14 +25,14 @@ from argnorm.lib import map_to_aro
 # Mapping the `ARR-2_1_HQ141279` gene from the `resfinder` database to the ARO
 print(map_to_aro('ARR-2_1_HQ141279', 'resfinder'))
 
-# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` reference database
-print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot', 'groot-argannot'))
+# Mapping the `argannot~~~(Bla)cfxA4~~~AY769933:1-966` gene in `groot` using the `groot-argannot` database
+print(map_to_aro('argannot~~~(Bla)cfxA4~~~AY769933:1-966', 'groot-argannot'))
 ```
 
 ### argnorm.lib.get_aro_mapping_table(): gets ARO mapping table for a specific database
 
 #### Parameters 
-* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg or groot
+* database (str): name of database. Can be: argannot, deeparg, megares, ncbi, resfinderfg, sarg, groot-db, groot-core-db, groot-argannot, groot-resfinder, and groot-card
 
 #### Returns
 * pandas.DataFrame: A pandas dataframe with ARGs mapped to AROs.
diff --git a/docs/index.md b/docs/index.md
index 88f38d7..7058399 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,7 +2,7 @@
 
 [![Python package](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml/badge.svg)](https://github.com/BigDataBiology/argNorm/actions/workflows/python-package.yml)
 [![Downloads](https://pepy.tech/badge/argNorm)](https://pepy.tech/project/argNorm)
-![](https://img.shields.io/badge/status-alpha-red?style=flat)
+![](https://img.shields.io/badge/status-beta-yellow?style=flat)
 [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/argnorm/README.html)
 [![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/platforms.svg)](https://anaconda.org/bioconda/argnorm)
 [![Anaconda-Server Badge](https://anaconda.org/bioconda/argnorm/badges/license.svg)](https://anaconda.org/bioconda/argnorm)
@@ -43,6 +43,7 @@ The `resistance_to_drug_classes` column will contain ARO numbers of the broader
 - [ABRicate](https://github.com/tseemann/abricate) (v1.0.1) with NCBI (v3.6), ResFinder (v4.1.11), MEGARes (v2.0), ARG-ANNOT (v5), ResFinderFG (v2)
 - [ResFinder](https://bitbucket.org/genomicepidemiology/resfinder/src/master/) (v4.0)
 - [AMRFinderPlus](https://github.com/ncbi/amr) (v3.10.30)
+- [GROOT](https://github.com/will-rowe/groot) (v1.1.2)
 
 ## Installation
 argNorm can be installed using pip:
@@ -72,6 +73,7 @@ The only positional argument required is `tool` which can be:
 - `abricate`
 - `resfinder`
 - `amrfinderplus`
+- `groot`
 
 The available options are:
 - `-h` or `--help`: shows available options and exits.
@@ -82,6 +84,7 @@ The available options are:
     - DeepARG (`deeparg`)
     - MEGARes (`megares`)
     - ARG-ANNOT (`argannot`)
+    - `groot-core-db`, `groot-db`, `groot-resfinder`, `groot-argannot`, `groot-card`
 - `--hamronized`: use this if the input is hamronized by [hAMRonization](https://github.com/pha4ge/hAMRonization)
 - `-i` or `--input`: path to the annotation result
 - `-o` or `--output`: the file to save normalization results
@@ -90,17 +93,20 @@ Use `argnorm -h` or `argnorm --help` to see available options.
 
 ```bash
 >argnorm -h
-usage: argnorm [-h] [--db {sarg,ncbi,resfinder,deeparg,megares,argannot}] [--hamronized] [-i INPUT] [-o OUTPUT] {argsoap,abricate,deeparg,resfinder,amrfinderplus}
+usage: argnorm [-h]
+               [--db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card}]
+               [--hamronized] [-i INPUT] [-o OUTPUT]
+               {argsoap,abricate,deeparg,resfinder,amrfinderplus,groot}
 
 argNorm normalizes ARG annotation results from different tools and databases to the same ontology, namely ARO (Antibiotic Resistance Ontology).
 
 positional arguments:
-  {argsoap,abricate,deeparg,resfinder,amrfinderplus}
+  {argsoap,abricate,deeparg,resfinder,amrfinderplus,groot}
                         The tool you used to do ARG annotation.
 
-options:
+optional arguments:
   -h, --help            show this help message and exit
-  --db {sarg,ncbi,resfinder,deeparg,megares,argannot}
+  --db {sarg,ncbi,resfinder,deeparg,megares,argannot,resfinderfg,groot-argannot,groot-resfinder,groot-db,groot-core-db,groot-card}
                         The database you used to do ARG annotation.
   --hamronized          Use this if the input is hamronized (processed using the hAMRonization tool)
   -i INPUT, --input INPUT
diff --git a/tests/test_lib.py b/tests/test_lib.py
index 3690f8c..d610dbe 100644
--- a/tests/test_lib.py
+++ b/tests/test_lib.py
@@ -11,9 +11,9 @@ def test_map_to_aro():
         ["(Phe)cpt_strepv:U09991:AAB36569:1412-1948:537", "argannot"],
         ["MEG_4060|Metals|Multi-metal_resistance|Multi-metal_resistance_protein|MREA", "megares"],
         ["gi:447201629:ref:WP_001278885.1:|FEATURES|cob(I)alamin_adenolsyltransferase|unclassified|cob(I)alamin_adenolsyltransferase", "deeparg"],
-        ["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot', 'groot-argannot'],
-        ["ErmF.3000498.M17124.1181-1982.593", 'groot', 'groot-card'],
-        ["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot', 'groot-db']
+        ["argannot~~~(Bla)cfxA4~~~AY769933:1-966", 'groot-argannot'],
+        ["ErmF.3000498.M17124.1181-1982.593", 'groot-card'],
+        ["groot-db_RESFINDER__tet(W)_1_DQ060146", 'groot-db']
     ]
 
     ARO = lib.get_aro_ontology()
@@ -36,7 +36,7 @@ def test_map_to_aro():
         else:
             assert map_to_aro(t[0], t[1]) == e
 
-@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot'])
+@pytest.mark.parametrize('database', ['argannot', 'megares', 'ncbi', 'resfinder', 'resfinderfg', 'groot', 'groot-argannot'])
 def test_get_aro_mapping_table_smoke(database):
     df = get_aro_mapping_table(database)
     assert len(df) > 0