From 893470f6919db3abe2c397d91b182c67ba458928 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 2 Jul 2024 17:10:56 -0700 Subject: [PATCH] Remove rebuild_country.yml + nextstrain_profiles/nextstrain-country As discussed in https://github.com/nextstrain/ncov/issues/1120, this profile does not work as expected and is not actively being used. --- .github/workflows/rebuild-country.yml | 55 ----- .../nextstrain-country/builds.yaml | 208 ------------------ .../nextstrain-country/config.yaml | 12 - .../nextstrain_description.md | 12 - 4 files changed, 287 deletions(-) delete mode 100644 .github/workflows/rebuild-country.yml delete mode 100644 nextstrain_profiles/nextstrain-country/builds.yaml delete mode 100644 nextstrain_profiles/nextstrain-country/config.yaml delete mode 100644 nextstrain_profiles/nextstrain-country/nextstrain_description.md diff --git a/.github/workflows/rebuild-country.yml b/.github/workflows/rebuild-country.yml deleted file mode 100644 index e99f84aa7..000000000 --- a/.github/workflows/rebuild-country.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: Rebuild country-specific phylogenetic dataset - -on: - # This workflow can be triggered from repository_dispatch events, - # for instance, after the appropriate preprocessing actions have completed - repository_dispatch: - types: - - rebuild - - rebuild-country - # Manually triggered using GitHub's UI - workflow_dispatch: - inputs: - trial_name: - description: "Short name for this trial build, for prefixing the uploaded data and results files. WARNING: without this we will overwrite files in s3://nextstrain-ncov-private and the trees on nextstrain.org/ncov/gisaid..." - required: false - image: - description: 'Specific container image to use for build (will override the default of "nextstrain build")' - required: false - -jobs: - nextstrain-country: - permissions: - id-token: write - uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master - secrets: inherit - with: - runtime: aws-batch - env: | - TRIAL_NAME: ${{ github.event.inputs.trial_name }} - NEXTSTRAIN_DOCKER_IMAGE: ${{ github.event.inputs.image }} - run: | - set -x - - declare -a config - config+=(build_date=\'$(date +'%Y-%m-%d')\') - if [[ "$TRIAL_NAME" ]]; then - config+=( - S3_DST_BUCKET=nextstrain-ncov-private/trial/"$TRIAL_NAME" - deploy_url=s3://nextstrain-staging/ - auspice_json_prefix=ncov_gisaid_trial_"$TRIAL_NAME" - ) - else - config+=(slack_token=$SLACK_TOKEN) - fi - - nextstrain build \ - --detach \ - --cpus 72 \ - --memory 140GiB \ - . \ - deploy \ - upload \ - --config "${config[@]}" \ - --profile nextstrain_profiles/nextstrain-country \ - --set-threads tree=8 diff --git a/nextstrain_profiles/nextstrain-country/builds.yaml b/nextstrain_profiles/nextstrain-country/builds.yaml deleted file mode 100644 index 529f201f3..000000000 --- a/nextstrain_profiles/nextstrain-country/builds.yaml +++ /dev/null @@ -1,208 +0,0 @@ -auspice_json_prefix: ncov_country - -# Define custom rules for pre- or post-standard workflow processing of data. -custom_rules: - - workflow/snakemake_rules/export_for_nextstrain.smk - -# These parameters are only used by the `export_for_nextstrain` rule and shouldn't need to be modified. -# To modify the s3 _source_ bucket, specify this directly in the `inputs` section of the config. -# P.S. These are intentionally set as top-level keys as this allows command-line overrides. -S3_DST_BUCKET: "nextstrain-ncov-private" -S3_DST_COMPRESSION: "xz" -S3_DST_ORIGINS: ["gisaid"] - -# Deploy and Slack options are related to Nextstrain live builds and don't need to be modified for local builds -deploy_url: s3://nextstrain-data -slack_token: ~ -slack_channel: "#ncov-gisaid-updates" - -genes: ["ORF1a", "ORF1b", "S", "ORF3a", "E", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "N", "ORF9b"] -use_nextalign: true -include_hcov19_prefix: True - -files: - description: "nextstrain_profiles/nextstrain-country/nextstrain_description.md" - -inputs: - - name: gisaid - metadata: "s3://nextstrain-ncov-private/metadata.tsv.zst" - aligned: "s3://nextstrain-ncov-private/aligned.fasta.zst" - skip_sanitize_metadata: true - -# Define locations for which builds should be created. -# For each build we specify a subsampling scheme via an explicit key. -# These subsampling schemes are defined at the bottom of this file. -# (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level -# -# Auspice config is specified in rule auspice_config in export_for_nextstrain.smk -builds: - nextstrain_country_1m: - subsampling_scheme: nextstrain_country_1m - title: Genomic epidemiology of SARS-CoV-2 with country-focused subsampling over the past month - country: India - nextstrain_country_2m: - subsampling_scheme: nextstrain_country_2m - title: Genomic epidemiology of SARS-CoV-2 with country-focused subsampling over the past 2 months - country: India - nextstrain_country_6m: - subsampling_scheme: nextstrain_country_6m - title: Genomic epidemiology of SARS-CoV-2 with country-focused subsampling over the past 6 months - country: India - nextstrain_country_all-time: - subsampling_scheme: nextstrain_country_all-time - title: Genomic epidemiology of SARS-CoV-2 with country-focused subsampling since pandemic start - country: India - -# remove sequences without division label in US -filter: - exclude_where: "division='USA'" - -subsampling: - - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_country_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'country={country}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'country={country}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_country_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'country={country}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'country={country}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_country_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'country={country}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'country!={country}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'country={country}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_country_all-time: - # Focal samples for country - focal: - group_by: "division year month" - max_sequences: 640 - exclude: "--exclude-where 'country!={country}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 160 - exclude: "--exclude-where 'country={country}'" - -# if different traits should be reconstructed for some builds, specify here -# otherwise the default trait config in defaults/parameters.yaml will used -traits: - nextstrain_country_1m: - sampling_bias_correction: 2.5 - columns: ["country"] - nextstrain_country_2m: - sampling_bias_correction: 2.5 - columns: ["country"] - nextstrain_country_6m: - sampling_bias_correction: 2.5 - columns: ["country"] - nextstrain_country_all-time: - sampling_bias_correction: 2.5 - columns: ["country"] - -# Define frequencies parameters -# Target frequencies to "1m", "2m", "6m" and "all-time" builds -# narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" -# narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" -frequencies: - nextstrain_country_1m: - min_date: "1M" - narrow_bandwidth: 0.019 - nextstrain_country_2m: - min_date: "2M" - narrow_bandwidth: 0.019 - recent_days_to_censor: 7 - nextstrain_country_6m: - min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - nextstrain_country_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 diff --git a/nextstrain_profiles/nextstrain-country/config.yaml b/nextstrain_profiles/nextstrain-country/config.yaml deleted file mode 100644 index 11365d5ed..000000000 --- a/nextstrain_profiles/nextstrain-country/config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -configfile: - - defaults/parameters.yaml - - nextstrain_profiles/nextstrain-country/builds.yaml - -cores: 8 -keep-going: False -printshellcmds: True -show-failed-logs: True -restart-times: 2 -reason: True -stats: stats.json -set-threads: tree=4 diff --git a/nextstrain_profiles/nextstrain-country/nextstrain_description.md b/nextstrain_profiles/nextstrain-country/nextstrain_description.md deleted file mode 100644 index ade4f3875..000000000 --- a/nextstrain_profiles/nextstrain-country/nextstrain_description.md +++ /dev/null @@ -1,12 +0,0 @@ -Compiled Nextstrain SARS-CoV-2 resources are available at [nextstrain.org/sars-cov-2](https://nextstrain.org/sars-cov-2/). Follow [@nextstrain](https://twitter.com/nextstrain) for updates. - -This phylogeny shows evolutionary relationships of SARS-CoV-2 viruses from the ongoing COVID-19 pandemic. Although the genetic relationships among sampled viruses are generally quite clear, there is considerable uncertainty surrounding estimates of specific transmission dates and in reconstruction of geographic spread. Please be aware that specific inferred geographic transmission patterns and temporal estimates are only a hypothesis. - -Site numbering and genome structure uses [Wuhan-Hu-1/2019](https://www.ncbi.nlm.nih.gov/nuccore/MN908947) as reference. The phylogeny is rooted relative to early samples from Wuhan. Temporal resolution assumes a nucleotide substitution rate of 8 × 10^-4 subs per site per year. Mutational fitness is calculated using results from [Obermeyer et al (under review)](https://www.medrxiv.org/content/10.1101/2021.09.07.21263228v1). Full details on bioinformatic processing can be found [here](https://github.com/nextstrain/ncov). - -We gratefully acknowledge the authors, originating and submitting laboratories of the genetic sequences and metadata made available through [GISAID](https://gisaid.org) on which this research is based. An attribution table is available by clicking on "Download Data" at the bottom of the page and then clicking on "Acknowledgments" in the resulting dialog box. - -At the specific request of GISAID, we: - - maintain the prefix `hCoV-19/` in the names of viral isolates - - disable download of full metadata TSV and provide instead an acknowledgments TSV in the "Download Data" link at the bottom of the page - - refrain from sharing alignments or other intermediate files computed in our pipeline