From a803dc460a0fb07459b25c0516f4fe054763041d Mon Sep 17 00:00:00 2001 From: verku Date: Wed, 19 Jun 2024 17:07:47 +0200 Subject: [PATCH] Update configuration files for slurm executor plugin profiles --- config/slurm/README.md | 28 +++- .../slurm/profile/config_plugin_dardel.yaml | 4 +- .../slurm/profile/config_plugin_rackham.yaml | 2 + .../slurm/profile/README.md | 122 ++++++++++++++++++ .../slurm/profile/config_plugin.yaml | 53 -------- .../slurm/profile/config_plugin_dardel.yaml | 74 +++++++++++ 6 files changed, 222 insertions(+), 61 deletions(-) create mode 100644 utilities/mutational_load_snpeff/slurm/profile/README.md delete mode 100644 utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml create mode 100644 utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml diff --git a/config/slurm/README.md b/config/slurm/README.md index 697d7f7..04845f2 100644 --- a/config/slurm/README.md +++ b/config/slurm/README.md @@ -23,7 +23,14 @@ time, their compute resources can be updated in this file. > Note that the current configuration files were adjusted to the HPC clusters Rackham from UPPMAX and Dardel from PDC/KTH. Details on how to configure and run GenErode on Dardel are provided below. -The configuration file for Snakemake version 7 was kept for comparison +Memory requirements are specified three times in these configuration +files: 1) under `set-threads` (used by Snakemake to specify threads +in rules), 2) under `set-resources` and therein under `mem_mb`, +specifying the memory in Megabytes (multiplying the number of threads +with the available memory per thread), and 3) under `set-resources` +and therein under `cpus-per-task` (the same number as specified under +`set-threads`, required for correct memory assignment on Dardel). The +configuration file for Snakemake version 7 was kept for comparison, which was also written for Rackham/UPPMAX. 3) Start GenErode the following: @@ -53,6 +60,10 @@ incomplete jobs and `-k` to keep going in case a job fails. module load PDC UPPMAX bioinfo-tools conda singularity tmux ``` +> Note that tmux is only available as a module on Dardel +but the equivalent tool screen is pre-installed and does +not need to be loaded. + 2) After cloning the repository, change permissions for the Snakefile: @@ -73,12 +84,17 @@ to `slurm/config.yaml`. This file specifies compute resources for each rule or group jobs to be run on Dardel. Any rule or group job that is not listed under `set-threads` or `set-resources` uses default resources specified under `default-resources`. If -any rule or group jobs fail due to too little memory or run +any rule or group job fails due to too little memory or run time, their compute resources can be updated in this file. -> Note that the current version of `config/slurm/profile/config_plugin_dardel.yaml` -is still being tested. Threads are currently specified under -`set-threads` and under `set-resources` as `cpus_per_task`. +> Note that memory requirements are specified three times in +the configuration file: 1) under `set-threads` (used by Snakemake +to specify threads in rules), 2) under `set-resources` and therein +under `mem_mb`, specifying the memory in Megabytes (multiplying +the number of threads with the available memory per thread), +and 3) under `set-resources` and therein under `cpus-per-task` +(the same number as specified under `set-threads`, required for +correct memory assignment on Dardel). 5) Start GenErode the following: @@ -96,7 +112,7 @@ conda activate generode - Start the dry run: ``` -snakemake --profile slurm -np &> YYMMDD_dry.out +snakemake --profile slurm -n &> YYMMDD_dry.out ``` - Start the main run: diff --git a/config/slurm/profile/config_plugin_dardel.yaml b/config/slurm/profile/config_plugin_dardel.yaml index 7732b36..50504df 100644 --- a/config/slurm/profile/config_plugin_dardel.yaml +++ b/config/slurm/profile/config_plugin_dardel.yaml @@ -112,8 +112,10 @@ set-resources: cpus_per_task: 16 fastqc_historical_raw: mem_mb: 16000 + cpus_per_task: 16 fastqc_modern_raw: mem_mb: 16000 + cpus_per_task: 16 fastp_historical: runtime: 600 mem_mb: 32000 @@ -225,9 +227,7 @@ set-resources: cpus_per_task: 32 sort_vcfs: runtime: 1440 - sort_vcfs: mem_mb: 16000 - sort_vcfs: cpus_per_task: 16 sorted_bcf2vcf: runtime: 300 diff --git a/config/slurm/profile/config_plugin_rackham.yaml b/config/slurm/profile/config_plugin_rackham.yaml index 0d931d1..2fae685 100644 --- a/config/slurm/profile/config_plugin_rackham.yaml +++ b/config/slurm/profile/config_plugin_rackham.yaml @@ -106,8 +106,10 @@ set-resources: cpus_per_task: 2 fastqc_historical_raw: mem_mb: 12800 + cpus_per_task: 2 fastqc_modern_raw: mem_mb: 12800 + cpus_per_task: 2 fastp_historical: runtime: 600 mem_mb: 32000 diff --git a/utilities/mutational_load_snpeff/slurm/profile/README.md b/utilities/mutational_load_snpeff/slurm/profile/README.md new file mode 100644 index 0000000..bfcdfc5 --- /dev/null +++ b/utilities/mutational_load_snpeff/slurm/profile/README.md @@ -0,0 +1,122 @@ +# GenErode execution on SLURM clusters + +With the switch to Snakemake version 8, GenErode can be run +the following on SLURM clusters: + +1) Create the GenErode conda environment or update an earlier +version. The latest conda environment contains the Snakemake +executor plugin for slurm: + +``` +conda create -f environment.yaml -n generode +``` + +2) Copy the example configuration file `slurm/profile/config_plugin_dardel.yaml` +to `slurm/config.yaml`. This file specifies compute resources +for each rule or group jobs. Any rule or group job that is +not listed under `set-threads` or `set-resources` uses +default resources specified under `default-resources`. If +any rule or group jobs fail due to too little memory or run +time, their compute resources can be updated in this file. + +> Note that the current configuration file was adjusted to the +HPC cluster Dardel from PDC/KTH. Details on how to configure and +run GenErode on Dardel are provided below. Memory requirements are +specified three times in the configuration file: 1) under +`set-threads` (used by Snakemake to specify threads in rules), 2) +under `set-resources` and therein under `mem_mb`, specifying the +memory in Megabytes (multiplying the number of threads with the +available memory per thread), and 3) under `set-resources` and +therein under `cpus-per-task` (the same number as specified under +`set-threads`, required for correct memory assignment on Dardel). + +3) Start GenErode the following: + +- Open a tmux or screen session +- Activate the GenErode conda environment +- Start the dry run: + +``` +snakemake --profile slurm -n &> YYMMDD_dry.out +``` + +- Start the main run: + +``` +snakemake --profile slurm &> YYMMDD_main.out +``` + +> Useful flags for running the pipeline: `--ri` to re-run +incomplete jobs and `-k` to keep going in case a job fails. + +## Specific instructions for Dardel + +1) Load the following modules on Dardel: + +``` +module load PDC UPPMAX bioinfo-tools conda singularity tmux +``` + +> Note that tmux is only available as a module on Dardel +but the equivalent tool screen is pre-installed and does +not need to be loaded. + +2) After cloning the repository, change permissions for the +Snakefile: + +``` +chmod 755 Snakefile +``` + +3) Create the GenErode conda environment or update an earlier +version. The latest conda environment contains the Snakemake +executor plugin for slurm: + +``` +conda create -f environment.yaml -n generode +``` + +4) Copy the configuration file `config/slurm/profile/config_plugin_dardel.yaml` +to `slurm/config.yaml`. This file specifies compute resources +for each rule or group jobs to be run on Dardel. Any rule or +group job that is not listed under `set-threads` or `set-resources` +uses default resources specified under `default-resources`. If +any rule or group job fails due to too little memory or run +time, their compute resources can be updated in this file. + +> Note that memory requirements are specified three times in +the configuration file: 1) under `set-threads` (used by Snakemake +to specify threads in rules), 2) under `set-resources` and therein +under `mem_mb`, specifying the memory in Megabytes (multiplying +the number of threads with the available memory per thread), +and 3) under `set-resources` and therein under `cpus-per-task` +(the same number as specified under `set-threads`, required for +correct memory assignment on Dardel). + +5) Start GenErode the following: + +- Open a tmux session (alternatively, you can use screen) + +- Activate the GenErode conda environment (create or update +from `environment.yaml`), replacing the path to the location +of the conda environment: + +``` +export CONDA_ENVS_PATH=/cfs/klemming/home/.../ +conda activate generode +``` + +- Start the dry run: + +``` +snakemake --profile slurm -n &> YYMMDD_dry.out +``` + +- Start the main run: + +``` +snakemake --profile slurm &> YYMMDD_main.out +``` + +> Useful flags for running the pipeline: `--ri` to re-run +incomplete jobs and `-k` to keep going in case a job fails. diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml deleted file mode 100644 index c2e8b05..0000000 --- a/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Configuration file for slurm plugin (Snakemake >8.0.0) for Dardel cluster at PDC/KTH -# snakemake CLI flags -executor: slurm -jobs: 100 -cores: 1 -printshellcmds: true -software-deployment-method: apptainer - -# slurm resources -## default-resources: applied to all jobs, overruled by resources defined below for jobs -default-resources: - slurm_account: XXX-XX-XXX # update this to your slurm account - slurm_partition: shared # use Dardel’s shared partition - runtime: 120 # default runtime in minutes - mem_mb: 8000 - nodes: 1 # one node on Dardel from the shared partition - ntasks: 1 # number of concurrent tasks / ranks - cpus_per_task: 8 # number of hyperthreads per task, corresponds to 1 GB RAM -set-threads: # map rule names to threads - - extract_number_of_samples=16 - - find_fixed_homozygote_alt_sites=32 - - remove_fixed_homozygote_alt_sites_merged_vcf=32 - - find_intron_intergenic_variants=16 - - remove_sites_snpEff_vcf=32 - - extract_high_impact_snps=16 - - extract_moderate_impact_snps=16 - - extract_low_impact_snps=16 - - extract_synonymous_variant_snps=16 - - total_load=8 - - realised_load=8 -set-resources: # map rule names to resources in general - - extract_number_of_samples:mem_mb=16000 - - extract_number_of_samples:runtime=30 - - find_fixed_homozygote_alt_sites:mem_mb=32000 - - find_fixed_homozygote_alt_sites:runtime=300 - - remove_fixed_homozygote_alt_sites_merged_vcf:mem_mb=32000 - - remove_fixed_homozygote_alt_sites_merged_vcf:runtime=300 - - find_intron_intergenic_variants:mem_mb=16000 - - find_intron_intergenic_variants:runtime=300 - - remove_sites_snpEff_vcf:mem_mb=32000 - - remove_sites_snpEff_vcf:runtime=300 - - extract_high_impact_snps:mem_mb=16000 - - extract_high_impact_snps:runtime=120 - - extract_moderate_impact_snps:mem_mb=16000 - - extract_moderate_impact_snps:runtime=120 - - extract_low_impact_snps:mem_mb=16000 - - extract_low_impact_snps:runtime=120 - - extract_synonymous_variant_snps:mem_mb=16000 - - extract_synonymous_variant_snps:runtime=120 - - total_load:mem_mb=6400 - - total_load:runtime=30 - - realised_load:mem_mb=6400 - - realised_load:runtime=30 \ No newline at end of file diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml new file mode 100644 index 0000000..32cb4b9 --- /dev/null +++ b/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml @@ -0,0 +1,74 @@ +# Configuration file for slurm plugin (Snakemake >8.0.0) for Dardel cluster at PDC/KTH +# snakemake CLI flags +executor: slurm +jobs: 100 +printshellcmds: true +software-deployment-method: apptainer + +# slurm resources +## default-resources: applied to all jobs, overruled by resources defined below for jobs +default-resources: + slurm_account: XXX-XX-XXX # update this to your slurm account + slurm_partition: shared # use Dardel’s shared partition + runtime: 120 # default runtime in minutes + mem_mb: 8000 + nodes: 1 # one node on Dardel from the shared partition + ntasks: 1 # number of concurrent tasks / ranks + cpus_per_task: 8 # number of hyperthreads per task, corresponds to 1 GB RAM + +## map rule names to threads +set-threads: + extract_number_of_samples: 16 + find_fixed_homozygote_alt_sites: 32 + remove_fixed_homozygote_alt_sites_merged_vcf: 32 + find_intron_intergenic_variants: 16 + remove_sites_snpEff_vcf: 32 + extract_high_impact_snps: 16 + extract_moderate_impact_snps: 16 + extract_low_impact_snps: 16 + extract_synonymous_variant_snps: 16 + total_load: 8 + realised_load: 8 + +## set-resources: map rule names to resources in general +set-resources: + extract_number_of_samples: + mem_mb: 16000 + runtime: 30 + cpus_per_task: 16 + find_fixed_homozygote_alt_sites: + mem_mb: 32000 + runtime: 300 + cpus_per_task: 32 + remove_fixed_homozygote_alt_sites_merged_vcf: + mem_mb: 32000 + runtime: 300 + cpus_per_task: 32 + find_intron_intergenic_variants: + mem_mb: 16000 + runtime: 300 + cpus_per_task: 16 + remove_sites_snpEff_vcf: + mem_mb: 32000 + runtime: 300 + cpus_per_task: 32 + extract_high_impact_snps: + mem_mb: 16000 + cpus_per_task: 16 + extract_moderate_impact_snps: + mem_mb: 16000 + cpus_per_task: 16 + extract_low_impact_snps: + mem_mb: 16000 + cpus_per_task: 16 + extract_synonymous_variant_snps: + mem_mb: 16000 + cpus_per_task: 16 + total_load: + mem_mb: 8000 + runtime: 30 + cpus_per_task: 8 + realised_load: + mem_mb: 8000 + runtime: 30 + cpus_per_task: 8