nextstrain · j23414 · May 13, 2024 · May 10, 2024 · May 10, 2024 · May 10, 2024
diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -9,7 +9,7 @@ wildcard_constraints:
 
 rule all:
     input:
-        auspice_json = expand("auspice/dengue_{serotype}_{gene}.json", serotype=serotypes, gene='genome'),
+        auspice_json = expand("auspice/dengue_{serotype}_{gene}.json", serotype=serotypes, gene=genes),
 
 include: "rules/prepare_sequences.smk"
 include: "rules/prepare_sequences_E.smk"

diff --git a/phylogenetic/config/color_orderings.tsv b/phylogenetic/config/color_orderings.tsv
@@ -235,15 +235,20 @@ recency	New
 
 ################
 
+ncbi_serotype	denv1
+ncbi_serotype	denv2
+ncbi_serotype	denv3
+ncbi_serotype	denv4
+
 nextclade_subtype	DENV1/I
 nextclade_subtype	DENV1/II
 nextclade_subtype	DENV1/III
 nextclade_subtype	DENV1/IV
 nextclade_subtype	DENV1/V
-nextclade_subtype	DENV2/AM
 nextclade_subtype	DENV2/AA
 nextclade_subtype	DENV2/AI
 nextclade_subtype	DENV2/AII
+nextclade_subtype	DENV2/AM
 nextclade_subtype	DENV2/C
 nextclade_subtype	DENV2/S
 nextclade_subtype	DENV3/I

diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml
@@ -4,7 +4,9 @@ display_strain_field: "strain"
 filter:
   exclude: "config/exclude.txt"
   group_by: "year region"
-  min_length: 5000
+  min_length:
+    genome: 5000
+    E: 1000
   sequences_per_group:
     all: '10'
     denv1: '36'
@@ -15,11 +17,11 @@ filter:
 traits:
   sampling_bias_correction: '3'
   traits_columns:
-    all: 'region nextclade_subtype'
-    denv1: 'country region nextclade_subtype'
-    denv2: 'country region nextclade_subtype'
-    denv3: 'country region nextclade_subtype'
-    denv4: 'country region nextclade_subtype'
+    all: 'region ncbi_serotype nextclade_subtype'
+    denv1: 'country region ncbi_serotype nextclade_subtype'
+    denv2: 'country region ncbi_serotype nextclade_subtype'
+    denv3: 'country region ncbi_serotype nextclade_subtype'
+    denv4: 'country region ncbi_serotype nextclade_subtype'
 
 clades:
   clade_definitions:

diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk
@@ -44,7 +44,7 @@ rule translate:
     input:
         tree = "results/{gene}/tree_{serotype}.nwk",
         node_data = "results/{gene}/nt-muts_{serotype}.json",
-        reference = "config/reference_{serotype}_genome.gb"
+        reference = lambda wildcard: "config/reference_{serotype}_genome.gb" if wildcard.gene in ['genome'] else "results/config/reference_{serotype}_{gene}.gb"
     output:
         node_data = "results/{gene}/aa-muts_{serotype}.json"
     shell:
@@ -85,12 +85,12 @@ rule traits:
 rule clades:
     """Annotating serotypes / genotypes"""
     input:
-        tree = "results/{gene}/tree_{serotype}.nwk",
-        nt_muts = "results/{gene}/nt-muts_{serotype}.json",
-        aa_muts = "results/{gene}/aa-muts_{serotype}.json",
+        tree = "results/genome/tree_{serotype}.nwk",
+        nt_muts = "results/genome/nt-muts_{serotype}.json",
+        aa_muts = "results/genome/aa-muts_{serotype}.json",
         clade_defs = lambda wildcards: config['clades']['clade_definitions'][wildcards.serotype],
     output:
-        clades = "results/{gene}/clades_{serotype}.json"
+        clades = "results/genome/clades_{serotype}.json"
     shell:
         """
         augur clades \

diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk
@@ -42,7 +42,7 @@ rule prepare_auspice_config:
     output:
         auspice_config="results/config/{gene}/auspice_config_{serotype}.json",
     params:
-        replace_clade_key="clade_membership",
+        replace_clade_key=lambda wildcard: r"clade_membership" if wildcard.gene in ['genome'] else r"nextclade_subtype",
         replace_clade_title=lambda wildcard: r"Serotype" if wildcard.serotype in ['all'] else r"DENV genotype",
     run:
         data = {
@@ -81,6 +81,11 @@ rule prepare_auspice_config:
                 "key": "nextclade_subtype",
                 "title": "Nextclade genotype",
                 "type": "categorical"
+              },
+              {
+                "key": "ncbi_serotype",
+                "title": "NCBI serotype",
+                "type": "categorical"
               }
             ],
             "geo_resolutions": [
@@ -89,8 +94,7 @@ rule prepare_auspice_config:
             ],
             "display_defaults": {
               "map_triplicate": True,
-              "color_by": params.replace_clade_key,
-              "distance_measure": "div"
+              "color_by": params.replace_clade_key
             },
             "filters": [
               "country",
@@ -113,7 +117,7 @@ rule export:
         metadata = "data/metadata_{serotype}.tsv",
         branch_lengths = "results/{gene}/branch-lengths_{serotype}.json",
         traits = "results/{gene}/traits_{serotype}.json",
-        clades = "results/{gene}/clades_{serotype}.json",
+        clades = lambda wildcard: "results/{gene}/clades_{serotype}.json" if wildcard.gene in ['genome'] else [],
         nt_muts = "results/{gene}/nt-muts_{serotype}.json",
         aa_muts = "results/{gene}/aa-muts_{serotype}.json",
         auspice_config = "results/config/{gene}/auspice_config_{serotype}.json",

diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk
@@ -52,15 +52,15 @@ rule filter:
       - excluding strains with missing region, country or date metadata
     """
     input:
-        sequences = "data/sequences_{serotype}.fasta",
+        sequences = lambda wildcard: "data/sequences_{serotype}.fasta" if wildcard.gene in ['genome'] else "results/{gene}/sequences_{serotype}.fasta",
         metadata = "data/metadata_{serotype}.tsv",
         exclude = config["filter"]["exclude"],
     output:
         sequences = "results/{gene}/filtered_{serotype}.fasta"
     params:
         group_by = config['filter']['group_by'],
         sequences_per_group = lambda wildcards: config['filter']['sequences_per_group'][wildcards.serotype],
-        min_length = config['filter']['min_length'],
+        min_length = lambda wildcard: config['filter']['min_length'][wildcard.gene],
         strain_id = config.get("strain_id_field", "strain"),
     shell:
         """
@@ -83,7 +83,7 @@ rule align:
     """
     input:
         sequences = "results/{gene}/filtered_{serotype}.fasta",
-        reference = "config/reference_{serotype}_genome.gb"
+        reference = lambda wildcard: "config/reference_{serotype}_genome.gb" if wildcard.gene in ['genome'] else "results/config/reference_{serotype}_{gene}.gb"
     output:
         alignment = "results/{gene}/aligned_{serotype}.fasta"
     shell: