From ceae8c55fed590c9f5ff9b7261aa024e47e73a91 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Wed, 27 Dec 2023 12:41:00 -0800 Subject: [PATCH] Adjust input length on nextclade runs Only run nextclade on sequences at least 1400 nt long, the approximate length of the dengue E gene. This would avoid misclassification on short sequences. --- ingest/workflow/snakemake_rules/nextclade.smk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ingest/workflow/snakemake_rules/nextclade.smk b/ingest/workflow/snakemake_rules/nextclade.smk index a7a73e46..2e632e29 100644 --- a/ingest/workflow/snakemake_rules/nextclade.smk +++ b/ingest/workflow/snakemake_rules/nextclade.smk @@ -8,6 +8,8 @@ rule nextclade_all: output: "data/nextclade_results/nextclade_all.tsv", threads: 4 + params: + min_length=1400, # approximately E gene length shell: """ nextclade run \ @@ -15,6 +17,7 @@ rule nextclade_all: -j {threads} \ --output-tsv {output} \ --min-match-rate 0.01 \ + --min-length {params.min_length} \ --silent \ {input.sequences} """