diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index c763f009..46457340 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -9,7 +9,7 @@ wildcard_constraints: rule all: input: - auspice_json = expand("auspice/dengue_{serotype}_{gene}.json", serotype=serotypes, gene='genome'), + auspice_json = expand("auspice/dengue_{serotype}_{gene}.json", serotype=serotypes, gene=genes), include: "rules/prepare_sequences.smk" include: "rules/prepare_sequences_E.smk" diff --git a/phylogenetic/config/color_orderings.tsv b/phylogenetic/config/color_orderings.tsv index 6569a5cf..e8334ae2 100644 --- a/phylogenetic/config/color_orderings.tsv +++ b/phylogenetic/config/color_orderings.tsv @@ -235,15 +235,20 @@ recency New ################ +ncbi_serotype denv1 +ncbi_serotype denv2 +ncbi_serotype denv3 +ncbi_serotype denv4 + nextclade_subtype DENV1/I nextclade_subtype DENV1/II nextclade_subtype DENV1/III nextclade_subtype DENV1/IV nextclade_subtype DENV1/V -nextclade_subtype DENV2/AM nextclade_subtype DENV2/AA nextclade_subtype DENV2/AI nextclade_subtype DENV2/AII +nextclade_subtype DENV2/AM nextclade_subtype DENV2/C nextclade_subtype DENV2/S nextclade_subtype DENV3/I diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml index f1edb220..243217fb 100644 --- a/phylogenetic/config/config_dengue.yaml +++ b/phylogenetic/config/config_dengue.yaml @@ -4,7 +4,9 @@ display_strain_field: "strain" filter: exclude: "config/exclude.txt" group_by: "year region" - min_length: 5000 + min_length: + genome: 5000 + E: 1000 sequences_per_group: all: '10' denv1: '36' @@ -15,11 +17,11 @@ filter: traits: sampling_bias_correction: '3' traits_columns: - all: 'region nextclade_subtype' - denv1: 'country region nextclade_subtype' - denv2: 'country region nextclade_subtype' - denv3: 'country region nextclade_subtype' - denv4: 'country region nextclade_subtype' + all: 'region ncbi_serotype nextclade_subtype' + denv1: 'country region ncbi_serotype nextclade_subtype' + denv2: 'country region ncbi_serotype nextclade_subtype' + denv3: 'country region ncbi_serotype nextclade_subtype' + denv4: 'country region ncbi_serotype nextclade_subtype' clades: clade_definitions: diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 0b2131bb..551c698a 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -44,7 +44,7 @@ rule translate: input: tree = "results/{gene}/tree_{serotype}.nwk", node_data = "results/{gene}/nt-muts_{serotype}.json", - reference = "config/reference_{serotype}_genome.gb" + reference = lambda wildcard: "config/reference_{serotype}_genome.gb" if wildcard.gene in ['genome'] else "results/config/reference_{serotype}_{gene}.gb" output: node_data = "results/{gene}/aa-muts_{serotype}.json" shell: @@ -85,12 +85,12 @@ rule traits: rule clades: """Annotating serotypes / genotypes""" input: - tree = "results/{gene}/tree_{serotype}.nwk", - nt_muts = "results/{gene}/nt-muts_{serotype}.json", - aa_muts = "results/{gene}/aa-muts_{serotype}.json", + tree = "results/genome/tree_{serotype}.nwk", + nt_muts = "results/genome/nt-muts_{serotype}.json", + aa_muts = "results/genome/aa-muts_{serotype}.json", clade_defs = lambda wildcards: config['clades']['clade_definitions'][wildcards.serotype], output: - clades = "results/{gene}/clades_{serotype}.json" + clades = "results/genome/clades_{serotype}.json" shell: """ augur clades \ diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index f7d2ced2..7e218ed3 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -42,7 +42,7 @@ rule prepare_auspice_config: output: auspice_config="results/config/{gene}/auspice_config_{serotype}.json", params: - replace_clade_key="clade_membership", + replace_clade_key=lambda wildcard: r"clade_membership" if wildcard.gene in ['genome'] else r"nextclade_subtype", replace_clade_title=lambda wildcard: r"Serotype" if wildcard.serotype in ['all'] else r"DENV genotype", run: data = { @@ -81,6 +81,11 @@ rule prepare_auspice_config: "key": "nextclade_subtype", "title": "Nextclade genotype", "type": "categorical" + }, + { + "key": "ncbi_serotype", + "title": "NCBI serotype", + "type": "categorical" } ], "geo_resolutions": [ @@ -89,8 +94,7 @@ rule prepare_auspice_config: ], "display_defaults": { "map_triplicate": True, - "color_by": params.replace_clade_key, - "distance_measure": "div" + "color_by": params.replace_clade_key }, "filters": [ "country", @@ -113,7 +117,7 @@ rule export: metadata = "data/metadata_{serotype}.tsv", branch_lengths = "results/{gene}/branch-lengths_{serotype}.json", traits = "results/{gene}/traits_{serotype}.json", - clades = "results/{gene}/clades_{serotype}.json", + clades = lambda wildcard: "results/{gene}/clades_{serotype}.json" if wildcard.gene in ['genome'] else [], nt_muts = "results/{gene}/nt-muts_{serotype}.json", aa_muts = "results/{gene}/aa-muts_{serotype}.json", auspice_config = "results/config/{gene}/auspice_config_{serotype}.json", diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 6a0c2675..b9521a4c 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -52,7 +52,7 @@ rule filter: - excluding strains with missing region, country or date metadata """ input: - sequences = "data/sequences_{serotype}.fasta", + sequences = lambda wildcard: "data/sequences_{serotype}.fasta" if wildcard.gene in ['genome'] else "results/{gene}/sequences_{serotype}.fasta", metadata = "data/metadata_{serotype}.tsv", exclude = config["filter"]["exclude"], output: @@ -60,7 +60,7 @@ rule filter: params: group_by = config['filter']['group_by'], sequences_per_group = lambda wildcards: config['filter']['sequences_per_group'][wildcards.serotype], - min_length = config['filter']['min_length'], + min_length = lambda wildcard: config['filter']['min_length'][wildcard.gene], strain_id = config.get("strain_id_field", "strain"), shell: """ @@ -83,7 +83,7 @@ rule align: """ input: sequences = "results/{gene}/filtered_{serotype}.fasta", - reference = "config/reference_{serotype}_genome.gb" + reference = lambda wildcard: "config/reference_{serotype}_genome.gb" if wildcard.gene in ['genome'] else "results/config/reference_{serotype}_{gene}.gb" output: alignment = "results/{gene}/aligned_{serotype}.fasta" shell: