From ed0344d29b321c482f1e9970c6dceb85eb9bf05d Mon Sep 17 00:00:00 2001 From: Nicholas Bokulich Date: Fri, 28 Apr 2023 16:52:38 +0200 Subject: [PATCH] get-gtdb-data: 214 release is new default (#157) Thank you doing this @nbokulich! I was unaware of the new GTDB release. I ran all the code manually as well as the test code. It all works beautifully. :-) --- rescript/get_gtdb.py | 5 +++-- rescript/plugin_setup.py | 2 +- rescript/tests/test_get_gtdb.py | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/rescript/get_gtdb.py b/rescript/get_gtdb.py index 0dd3588..537de6d 100644 --- a/rescript/get_gtdb.py +++ b/rescript/get_gtdb.py @@ -21,11 +21,12 @@ # bacteria. for example 'ar53' and 'bac120' mean that the GTDB phylogeny # is based on 53 and 120 concatenated proteins (cp), respectively. # If this changes we can set up a conditional statemnt below. -VERSION_MAP_DICT = {'207': {'Archaea': 'ar53', 'Bacteria': 'bac120'}, +VERSION_MAP_DICT = {'214': {'Archaea': 'ar53', 'Bacteria': 'bac120'}, + '207': {'Archaea': 'ar53', 'Bacteria': 'bac120'}, '202': {'Archaea': 'ar122', 'Bacteria': 'bac120'}} -def get_gtdb_data(ctx, version='207', domain='Both'): +def get_gtdb_data(ctx, version='214', domain='Both'): ver_dom_dict = defaultdict(lambda: defaultdict(dict)) diff --git a/rescript/plugin_setup.py b/rescript/plugin_setup.py index 8e98f7c..1d43d4e 100644 --- a/rescript/plugin_setup.py +++ b/rescript/plugin_setup.py @@ -904,7 +904,7 @@ function=get_gtdb_data, inputs={}, parameters={ - 'version': Str % Choices(['202', '207']), + 'version': Str % Choices(['202', '207', '214']), 'domain': Str % Choices(['Both', 'Bacteria', 'Archaea']), }, outputs=[('gtdb_taxonomy', FeatureData[Taxonomy]), diff --git a/rescript/tests/test_get_gtdb.py b/rescript/tests/test_get_gtdb.py index d0a7f4b..6f4bb87 100644 --- a/rescript/tests/test_get_gtdb.py +++ b/rescript/tests/test_get_gtdb.py @@ -51,6 +51,10 @@ def test_assemble_queries(self): obs_seq_urls = [q_info[1] for q_info in queries['Sequence']] exp_tax_urls = [('https://data.gtdb.ecogenomic.org/releases/' + 'release214/214.0/ar53_taxonomy_r214.tsv.gz'), + ('https://data.gtdb.ecogenomic.org/releases/' + 'release214/214.0/bac120_taxonomy_r214.tsv.gz'), + ('https://data.gtdb.ecogenomic.org/releases/' 'release207/207.0/ar53_taxonomy_r207.tsv.gz'), ('https://data.gtdb.ecogenomic.org/releases/' 'release207/207.0/bac120_taxonomy_r207.tsv.gz'), @@ -59,6 +63,12 @@ def test_assemble_queries(self): ('https://data.gtdb.ecogenomic.org/releases' '/release202/202.0/bac120_taxonomy_r202.tsv.gz')] exp_seq_urls = [('https://data.gtdb.ecogenomic.org/releases' + '/release214/214.0/genomic_files_reps/' + 'ar53_ssu_reps_r214.tar.gz'), + ('https://data.gtdb.ecogenomic.org/releases/' + 'release214/214.0/genomic_files_reps/' + 'bac120_ssu_reps_r214.tar.gz'), + ('https://data.gtdb.ecogenomic.org/releases' '/release207/207.0/genomic_files_reps/' 'ar53_ssu_reps_r207.tar.gz'), ('https://data.gtdb.ecogenomic.org/releases/' @@ -91,11 +101,11 @@ def _makey_fakey_arch(faking_ignore_this): def _makey_fakey_bact(faking_ignore_this): return [self.bact_tax], [self.bact_seqs] - # default (both domains, version 207) + # default (both domains, version 214) with patch('rescript.get_gtdb._retrieve_data_from_gtdb', new=_makey_fakey_both): res = rescript.actions.get_gtdb_data( - version='207', domain='Both') + version='214', domain='Both') self.assertEqual(len(res), 2) self.assertEqual(str(res[0].type), 'FeatureData[Taxonomy]') self.assertEqual(str(res[1].type), 'FeatureData[Sequence]')