diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index ef21205..0000000 --- a/Dockerfile +++ /dev/null @@ -1,60 +0,0 @@ -FROM condaforge/mambaforge:latest -LABEL io.github.snakemake.containerized="true" -LABEL io.github.snakemake.conda_env_hash="fd0eeef546ed00fa95ff85a8baf143dfccd52a6183b57e9bb8e972bf196948d5" - -# Step 1: Retrieve conda environments - -# Conda environment: -# source: workflow/envs/R.yaml -# prefix: /conda-envs/ee4bfa88159ef77575445596ec2221ce -# name: R -# channels: -# - conda-forge -# - r -# - defaults -# dependencies: -# #- xorg-libx11 -# #- xorg-libxau -# #- r::r-ggplot2 -# - r-base>=4.0 -# - r-essentials -# - r-cairo -# - r-data.table -# - r-cowplot -# - r-argparse>=2.1.2 -# - r-glue -# - r-r.utils -# - r-rcolorbrewer -# - r-scales -# - r-tidyverse>=1.3.0 -RUN mkdir -p /conda-envs/ee4bfa88159ef77575445596ec2221ce -COPY workflow/envs/R.yaml /conda-envs/ee4bfa88159ef77575445596ec2221ce/environment.yaml - -# Conda environment: -# source: workflow/envs/env.yaml -# prefix: /conda-envs/5ea0207c595d8a051ab13a13766b14f6 -# name: python_and_cli_env -# channels: -# - conda-forge -# - bioconda -# - defaults -# dependencies: -# - numpy<1.20 -# - numba -# - cooler<=0.8.11 -# - pandas<=1.5 -# - minimap2==2.18 -# - bioconda::bedtools -# - bioconda::samtools>=1.14 -# - bioconda::htslib>=1.14 -# - bioconda::pysam>=0.15.0 -# - bioconda::bwa -# - pigz -RUN mkdir -p /conda-envs/5ea0207c595d8a051ab13a13766b14f6 -COPY workflow/envs/env.yaml /conda-envs/5ea0207c595d8a051ab13a13766b14f6/environment.yaml - -# Step 2: Generate conda environments - -RUN mamba env create --prefix /conda-envs/ee4bfa88159ef77575445596ec2221ce --file /conda-envs/ee4bfa88159ef77575445596ec2221ce/environment.yaml && \ - mamba env create --prefix /conda-envs/5ea0207c595d8a051ab13a13766b14f6 --file /conda-envs/5ea0207c595d8a051ab13a13766b14f6/environment.yaml && \ - mamba clean --all -y diff --git a/README.md b/README.md index 0e92443..3a46e03 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ To install you can follow the directions on the [usage page](https://snakemake.g You will need a current version of `snakemake` to run this workflow. To get `snakemake` please follow the install [instructions](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html) on their website, but in brief once `conda` and `mamba` are installed you can install `snakemake` with: ``` -mamba create -n snakemake -c conda-forge -c bioconda snakemake +mamba create -n snakemake -c conda-forge -c bioconda 'snakemake>=8' ``` Afterwards you can activate the `conda` environment and download the repository. And all additional dependencies will be handled by `snakemake`. @@ -31,20 +31,20 @@ Choose a sample identifier for your run e.g. `chr8` and a fasta file on which yo Once this is done and you have activated your `conda` env with `snakemake` you can run the pipeline like so: ``` -snakemake --use-conda --cores 24 +snakemake --cores 24 ``` Or do a dry run of the pipeline: ``` -snakemake --use-conda --cores 24 -n +snakemake --cores 24 -n ``` All parameters are described in `config/README.md` and you can modify any of them by modifying `config/config.yaml`. You can also change the configuration via the command line. For example, to change the `sample` identifier and `fasta` options do: ``` -snakemake --use-conda --cores 24 --config sample=test2 fasta=/some/fasta/path.fa +snakemake --cores 24 --config sample=test2 fasta=/some/fasta/path.fa ``` Please try the test case with the default configuration file before submitting issues. @@ -59,7 +59,7 @@ The file `results/{sample}.{\d+}.{\d+}.bed` will contain all the alignments iden To make pdfs and pngs for a particular set of regions just add `make_figures` to your command. This is generally appropriate for comparing up to ~5 regions totaling at most ~40 Mbp. ``` -snakemake --use-conda --cores 24 make_figures +snakemake --cores 24 make_figures ``` This will make an output directory under `results/{sample}.{\d+}.{\d+}_figures` with a variety of dot plots in `pdf` and `png` format. @@ -71,7 +71,7 @@ If you see `tri.TRUE` in the output pdf/png it means that the dot plot is rotate Making an interactive whole genome visualization requires the use of the program [HiGlass](https://higlass.io/) and a web browser. However, this pipeline will make the necessary input files with the following command: ``` -snakemake --use-conda --cores 24 cooler +snakemake --cores 24 cooler ``` To view locally, use `higlass-manage`: @@ -89,7 +89,7 @@ To create a high-resolution interactive visualization where the coloring is proportionally to the number of reads mapped to each bin, use the following command: ``` -snakemake --use-conda --cores 24 cooler_density --config window=32 cooler_window=100 +snakemake --cores 24 cooler_density --config window=32 cooler_window=100 ``` ## Arabidopsis: quick start, case example, and benchmark @@ -105,7 +105,7 @@ wget https://github.com/schatzlab/Col-CEN/raw/main/v1.2/Col-CEN_v1.2.fasta.gz \ Using 8 cores on a laptop with 32 GB of ram we ran StainedGlass using the following commands: ```shell -time snakemake --cores 8 --config sample=arabidopsis fasta=Col-CEN_v1.2.fasta --use-conda +time snakemake --cores 8 --config sample=arabidopsis fasta=Col-CEN_v1.2.fasta ``` This command generated 41,036,963 self-self pairwise alignments within the assembly, 16,699,976 of which passed filters for downstream analysis. @@ -113,7 +113,7 @@ This command generated 41,036,963 self-self pairwise alignments within the assem Then to generate the cooler files that can be loaded in HiGlass we ran the following command with the already computed alignments: ```shell -time snakemake --cores 8 --config sample=arabidopsis fasta=Col-CEN_v1.2.fasta --use-conda cooler +time snakemake --cores 8 --config sample=arabidopsis fasta=Col-CEN_v1.2.fasta cooler ``` The results can be viewed at [resgen.io/paper-data/Naish](https://resgen.io/paper-data/Naish%202021%20-%20Arabidopsis/views/EYd0Kq5XTY6jhKpCK08Jjg/), and we include a static view of the centromeres here: diff --git a/workflow/Snakefile b/workflow/Snakefile index ad167b1..0b25538 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -4,7 +4,7 @@ import sys from snakemake.utils import min_version -min_version("6.0") +min_version("8.0") bold = "\033[1m" green = "\033[92m" @@ -18,10 +18,12 @@ msg = f"""{green}{bold}Thanks for using StainedGlass and please remember to cite """ sys.stderr.write(msg) -SDIR = os.path.realpath(os.path.dirname(srcdir("Snakefile"))) shell.prefix(f"set -eo pipefail; ") +# container: "docker://continuumio/miniconda3" + + configfile: "config/config.yaml" diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml new file mode 100644 index 0000000..3edfd55 --- /dev/null +++ b/workflow/profiles/default/config.yaml @@ -0,0 +1,10 @@ +default-resources: + - mem_mb=16096 + - runtime=120 +rerun-incomplete: True +rerun-triggers: mtime +use-conda: True +#use-singularity: True +show-failed-logs: True +conda-frontend: conda +cores: 4