Skip to content

Commit

Permalink
Merge pull request #24 from nextstrain/more-example-data-23
Browse files Browse the repository at this point in the history
Add Nextclade and geo rules to example-data; disable "prM-E" build in phylo CI
  • Loading branch information
genehack authored Dec 5, 2024
2 parents 0972a30 + d94270f commit 32afa98
Show file tree
Hide file tree
Showing 9 changed files with 44,776 additions and 45 deletions.
34 changes: 30 additions & 4 deletions ingest/build-configs/ci/copy_example_data.smk
Original file line number Diff line number Diff line change
@@ -1,12 +1,38 @@
rule copy_example_data:
rule copy_example_ncbi_data:
input:
ncbi_dataset="example-data/ncbi_dataset.zip"
output:
ncbi_dataset=temp("data/ncbi_dataset.zip")
shell:
"""
r"""
cp -f {input.ncbi_dataset} {output.ncbi_dataset}
"""

# force this rule over NCBI data fetch
ruleorder: copy_example_data > fetch_ncbi_dataset_package
ruleorder: copy_example_ncbi_data > fetch_ncbi_dataset_package


DATASET_NAME = config["nextclade"]["dataset_name"]
rule copy_example_nextclade_data:
input:
nextclade_dataset="example-data/nextclade_dataset.zip"
output:
nextclade_dataset=temp("data/nextclade_data/{DATASET_NAME}.zip")
shell:
r"""
cp -f {input.nextclade_dataset} {output.nextclade_dataset}
"""
# force this rule over Nextclade data fetch
ruleorder: copy_example_nextclade_data > get_nextclade_dataset


rule copy_example_geolocation_rules:
input:
general_geolocation_rules="example-data/general-geolocation-rules.tsv"
output:
general_geolocation_rules="data/general-geolocation-rules.tsv",
shell:
r"""
cp -f {input.general_geolocation_rules} {output.general_geolocation_rules}
"""
# force this rule over downloading geolocation rules
ruleorder: copy_example_geolocation_rules > fetch_general_geolocation_rules
44,702 changes: 44,702 additions & 0 deletions ingest/example-data/general-geolocation-rules.tsv

Large diffs are not rendered by default.

Binary file added ingest/example-data/nextclade_dataset.zip
Binary file not shown.
6 changes: 2 additions & 4 deletions phylogenetic/Snakefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
configfile: "defaults/config.yaml"

gene = ["genome", "prM-E"]

rule all:
input:
auspice_json=expand("auspice/yellow-fever-virus_{gene}.json", gene=gene),
tip_frequencies_json=expand("auspice/yellow-fever-virus_{gene}_tip-frequencies.json", gene=gene),
auspice_json=expand("auspice/yellow-fever-virus_{build}.json", build=config["builds"]),
tip_frequencies_json=expand("auspice/yellow-fever-virus_{build}_tip-frequencies.json", build=config["builds"]),


include: "rules/prepare_sequences.smk"
Expand Down
2 changes: 2 additions & 0 deletions phylogenetic/build-configs/ci/config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
builds:
- "genome"
custom_rules:
- build-configs/ci/copy_example_data.smk
3 changes: 3 additions & 0 deletions phylogenetic/defaults/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
builds:
- "genome"
- "prM-E"
strain_id_field: "accession"
files:
description: "defaults/description.md"
Expand Down
30 changes: 15 additions & 15 deletions phylogenetic/rules/annotate_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ phylogenetic tree.
rule ancestral:
"""Reconstructing ancestral sequences and mutations"""
input:
tree = "results/{gene}/tree.nwk",
alignment = "results/{gene}/aligned_and_filtered.fasta"
tree = "results/{build}/tree.nwk",
alignment = "results/{build}/aligned_and_filtered.fasta"
output:
node_data = "results/{gene}/nt_muts.json"
node_data = "results/{build}/nt_muts.json"
params:
inference = config["ancestral"]["inference"]
log:
"logs/{gene}/ancestral.txt",
"logs/{build}/ancestral.txt",
benchmark:
"benchmarks/{gene}/ancestral.txt"
"benchmarks/{build}/ancestral.txt"
shell:
r"""
augur ancestral \
Expand All @@ -29,15 +29,15 @@ rule ancestral:
rule translate:
"""Translating amino acid sequences"""
input:
tree = "results/{gene}/tree.nwk",
node_data = "results/{gene}/nt_muts.json",
genemap = "defaults/genemap_{gene}.gff"
tree = "results/{build}/tree.nwk",
node_data = "results/{build}/nt_muts.json",
genemap = "defaults/genemap_{build}.gff"
output:
node_data = "results/{gene}/aa_muts.json"
node_data = "results/{build}/aa_muts.json"
log:
"logs/{gene}/translate.txt",
"logs/{build}/translate.txt",
benchmark:
"benchmarks/{gene}/translate.txt"
"benchmarks/{build}/translate.txt"
shell:
r"""
augur translate \
Expand All @@ -52,17 +52,17 @@ rule translate:
rule traits:
"""Inferring ancestral traits for {params.columns!s}"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv",
output:
node_data = "results/{gene}/traits.json",
node_data = "results/{build}/traits.json",
params:
columns = config["traits"]["columns"],
strain_id = config["strain_id_field"],
log:
"logs/{gene}/traits.txt",
"logs/{build}/traits.txt",
benchmark:
"benchmarks/{gene}/traits.txt"
"benchmarks/{build}/traits.txt"
shell:
r"""
augur traits \
Expand Down
22 changes: 11 additions & 11 deletions phylogenetic/rules/construct_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ This part of the workflow constructs the phylogenetic tree.
rule tree:
"""Building tree"""
input:
alignment = "results/{gene}/aligned_and_filtered.fasta"
alignment = "results/{build}/aligned_and_filtered.fasta"
output:
tree = "results/{gene}/tree_raw.nwk"
tree = "results/{build}/tree_raw.nwk"
log:
"logs/{gene}/tree.txt",
"logs/{build}/tree.txt",
benchmark:
"benchmarks/{gene}/tree.txt"
"benchmarks/{build}/tree.txt"
shell:
r"""
augur tree \
Expand All @@ -28,24 +28,24 @@ rule refine:
- filter tips more than {params.clock_filter_iqd} IQDs from clock expectation
"""
input:
tree = "results/{gene}/tree_raw.nwk",
alignment = "results/{gene}/aligned_and_filtered.fasta",
tree = "results/{build}/tree_raw.nwk",
alignment = "results/{build}/aligned_and_filtered.fasta",
metadata = "data/metadata.tsv"
output:
tree = "results/{gene}/tree.nwk",
node_data = "results/{gene}/branch_lengths.json"
tree = "results/{build}/tree.nwk",
node_data = "results/{build}/branch_lengths.json"
params:
strain_id = config["strain_id_field"],
timetree = lambda w: "--timetree" if w.gene == "genome" else "",
timetree = lambda w: "--timetree" if w.build == "genome" else "",
clock_rate = config["refine"]["clock_rate"],
clock_std_dev = config["refine"]["clock_std_dev"],
coalescent = config["refine"]["coalescent"],
date_inference = config["refine"]["date_inference"],
clock_filter_iqd = config["refine"]["clock_filter_iqd"],
log:
"logs/{gene}/refine.txt",
"logs/{build}/refine.txt",
benchmark:
"benchmarks/{gene}/refine.txt"
"benchmarks/{build}/refine.txt"
shell:
r"""
augur refine \
Expand Down
22 changes: 11 additions & 11 deletions phylogenetic/rules/export.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,24 @@ rule colors:
rule export:
"""Exporting data files for for auspice"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv",
branch_lengths = "results/{gene}/branch_lengths.json",
nt_muts = "results/{gene}/nt_muts.json",
aa_muts = "results/{gene}/aa_muts.json",
traits = "results/{gene}/traits.json",
branch_lengths = "results/{build}/branch_lengths.json",
nt_muts = "results/{build}/nt_muts.json",
aa_muts = "results/{build}/aa_muts.json",
traits = "results/{build}/traits.json",
colors = "data/colors.tsv",
auspice_config = lambda w: config["files"][w.gene]["auspice_config"],
auspice_config = lambda w: config["files"][w.build]["auspice_config"],
description=config["files"]["description"],
output:
auspice_json = "auspice/yellow-fever-virus_{gene}.json"
auspice_json = "auspice/yellow-fever-virus_{build}.json"
params:
metadata_columns = config["export"]["metadata_columns"],
strain_id = config["strain_id_field"],
log:
"logs/{gene}/export.txt",
"logs/{build}/export.txt",
benchmark:
"benchmarks/{gene}/export.txt"
"benchmarks/{build}/export.txt"
shell:
r"""
augur export v2 \
Expand All @@ -63,10 +63,10 @@ rule tip_frequencies:
Estimating KDE frequencies for tips
"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv"
output:
tip_freq = "auspice/yellow-fever-virus_{gene}_tip-frequencies.json"
tip_freq = "auspice/yellow-fever-virus_{build}_tip-frequencies.json"
params:
strain_id = config["strain_id_field"],
min_date = config["tip_frequencies"]["min_date"],
Expand Down

0 comments on commit 32afa98

Please sign in to comment.