Skip to content

Commit

Permalink
get-gtdb-data: 214 release is new default (#157)
Browse files Browse the repository at this point in the history
Thank you doing this @nbokulich! I was unaware of the new GTDB release. I ran all the code manually as well as the test code. It all works beautifully. :-)
  • Loading branch information
nbokulich authored Apr 28, 2023
1 parent 93f9ab0 commit ed0344d
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
5 changes: 3 additions & 2 deletions rescript/get_gtdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
# bacteria. for example 'ar53' and 'bac120' mean that the GTDB phylogeny
# is based on 53 and 120 concatenated proteins (cp), respectively.
# If this changes we can set up a conditional statemnt below.
VERSION_MAP_DICT = {'207': {'Archaea': 'ar53', 'Bacteria': 'bac120'},
VERSION_MAP_DICT = {'214': {'Archaea': 'ar53', 'Bacteria': 'bac120'},
'207': {'Archaea': 'ar53', 'Bacteria': 'bac120'},
'202': {'Archaea': 'ar122', 'Bacteria': 'bac120'}}


def get_gtdb_data(ctx, version='207', domain='Both'):
def get_gtdb_data(ctx, version='214', domain='Both'):

ver_dom_dict = defaultdict(lambda: defaultdict(dict))

Expand Down
2 changes: 1 addition & 1 deletion rescript/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@
function=get_gtdb_data,
inputs={},
parameters={
'version': Str % Choices(['202', '207']),
'version': Str % Choices(['202', '207', '214']),
'domain': Str % Choices(['Both', 'Bacteria', 'Archaea']),
},
outputs=[('gtdb_taxonomy', FeatureData[Taxonomy]),
Expand Down
14 changes: 12 additions & 2 deletions rescript/tests/test_get_gtdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def test_assemble_queries(self):
obs_seq_urls = [q_info[1] for q_info in queries['Sequence']]

exp_tax_urls = [('https://data.gtdb.ecogenomic.org/releases/'
'release214/214.0/ar53_taxonomy_r214.tsv.gz'),
('https://data.gtdb.ecogenomic.org/releases/'
'release214/214.0/bac120_taxonomy_r214.tsv.gz'),
('https://data.gtdb.ecogenomic.org/releases/'
'release207/207.0/ar53_taxonomy_r207.tsv.gz'),
('https://data.gtdb.ecogenomic.org/releases/'
'release207/207.0/bac120_taxonomy_r207.tsv.gz'),
Expand All @@ -59,6 +63,12 @@ def test_assemble_queries(self):
('https://data.gtdb.ecogenomic.org/releases'
'/release202/202.0/bac120_taxonomy_r202.tsv.gz')]
exp_seq_urls = [('https://data.gtdb.ecogenomic.org/releases'
'/release214/214.0/genomic_files_reps/'
'ar53_ssu_reps_r214.tar.gz'),
('https://data.gtdb.ecogenomic.org/releases/'
'release214/214.0/genomic_files_reps/'
'bac120_ssu_reps_r214.tar.gz'),
('https://data.gtdb.ecogenomic.org/releases'
'/release207/207.0/genomic_files_reps/'
'ar53_ssu_reps_r207.tar.gz'),
('https://data.gtdb.ecogenomic.org/releases/'
Expand Down Expand Up @@ -91,11 +101,11 @@ def _makey_fakey_arch(faking_ignore_this):
def _makey_fakey_bact(faking_ignore_this):
return [self.bact_tax], [self.bact_seqs]

# default (both domains, version 207)
# default (both domains, version 214)
with patch('rescript.get_gtdb._retrieve_data_from_gtdb',
new=_makey_fakey_both):
res = rescript.actions.get_gtdb_data(
version='207', domain='Both')
version='214', domain='Both')
self.assertEqual(len(res), 2)
self.assertEqual(str(res[0].type), 'FeatureData[Taxonomy]')
self.assertEqual(str(res[1].type), 'FeatureData[Sequence]')
Expand Down

0 comments on commit ed0344d

Please sign in to comment.