Skip to content

Commit

Permalink
MRG: add utilities for using ictv taxonomic ranks with sourmash tax (
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes authored Feb 10, 2024
1 parent a82ced1 commit aef3c61
Show file tree
Hide file tree
Showing 10 changed files with 553 additions and 29 deletions.
4 changes: 2 additions & 2 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,8 @@ The sourmash `tax` or `taxonomy` commands integrate taxonomic
`gather` command (we cannot combine separate `gather` runs for the
same query). For supported databases (e.g. GTDB, NCBI), we provide
taxonomy csv files, but they can also be generated for user-generated
databases. As of v4.8, some sourmash taxonomy commands can also use `LIN`
lineage information. For more information, see [databases](databases.md).
databases. As of v4.8 and 4.8.6, respectively, some sourmash taxonomy
commands can also use `LIN` or `ICTV` lineage information.

`tax` commands rely upon the fact that `gather` provides both the total
fraction of the query matched to each database matched, as well as a
Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def subparser(subparsers):
default=False,
help="use LIN taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain LIN lineage information.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)


def main(args):
Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ def subparser(subparsers):
default=None,
help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will restrict classification to these groups.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)
add_tax_threshold_arg(subparser, 0.1)
add_rank_arg(subparser)

Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/metagenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ def subparser(subparsers):
default=None,
help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will produce a 'lingroup' report containing taxonomic summarization for each group.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)
add_rank_arg(subparser)


Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ def subparser(subparsers):
default=False,
help="use LIN taxonomy in place of standard taxonomic ranks.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)


def main(args):
Expand Down
10 changes: 10 additions & 0 deletions src/sourmash/tax/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
RankLineageInfo,
LINLineageInfo,
AnnotateTaxResult,
ICTVRankLineageInfo,
)

usage = """
Expand Down Expand Up @@ -82,6 +83,7 @@ def metagenome(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)
available_ranks = tax_assign.available_ranks
except ValueError as exc:
Expand Down Expand Up @@ -113,6 +115,7 @@ def metagenome(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)
except ValueError as exc:
error(f"ERROR: {str(exc)}")
Expand Down Expand Up @@ -258,6 +261,7 @@ def genome(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)
available_ranks = tax_assign.available_ranks

Expand Down Expand Up @@ -297,6 +301,7 @@ def genome(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)

except ValueError as exc:
Expand Down Expand Up @@ -402,6 +407,7 @@ def annotate(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)

except ValueError as exc:
Expand Down Expand Up @@ -466,6 +472,7 @@ def annotate(args):
raw=row,
id_col=id_col,
lins=args.lins,
ictv=args.ictv,
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
)
Expand Down Expand Up @@ -591,6 +598,7 @@ def summarize(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)
except ValueError as exc:
error("ERROR while loading taxonomies!")
Expand Down Expand Up @@ -637,6 +645,8 @@ def summarize(args):
rank = lineage[-1].rank
if args.lins:
inf = LINLineageInfo(lineage=lineage)
elif args.ictv:
inf = ICTVRankLineageInfo(lineage=lineage)
else:
inf = RankLineageInfo(lineage=lineage)
lin = inf.display_lineage()
Expand Down
Loading

0 comments on commit aef3c61

Please sign in to comment.