-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
65 lines (53 loc) · 2.17 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
""" Process CITE-Seq data """
""" 1 - preprocess will plot important QC metrics for data filtration """
""" 2 - integrate will find anchors and integrate all samples into an integrated seurat object """
""" 3 - cluster will cluster individual samples plus integrated object """
""" 4 - differential will test for DE genes in seurat objects """
import os
import sys
import time
import glob
configfile: "config/config.yaml"
SAMPLES = [ os.path.basename(i) for i in glob.glob("data/raw/*")]
def message(m):
sys.stderr.write("|--- {} \n".format(m))
for i in SAMPLES:
message("Samples to process: {}".format(i))
p = config["project_name"]
if not os.path.isdir("data/reports"):
os.mkdir("data/reports")
if not os.path.isdir("data/rda"):
os.mkdir("data/rda")
# snakemake -j 8 --use-conda --cluster-config cluster.yaml --profile slurm
rule all:
input:
# 1 - preprocess
"data/reports/1-preprocess.html",
"data/rda/preprocess.{project_name}.rds".format(project_name = p),
# 2 - integrate
# "data/reports/2-integrate.html",
# "data/rda/integrate.{project_name}.rds".format(project_name = p),
# 3 - cluster
# "data/reports/3-cluster.html",
# "data/rda/cluster.{project_name}.rds".format(project_name = p),
# 4 - differential
# "data/reports/4-differential.html"
rule preprocess:
input:
rmd = "scripts/1-preprocess.Rmd",
samples = expand("data/raw/{sample}/outs/filtered_feature_bc_matrix", sample = SAMPLES)
output:
report = "data/reports/1-preprocess.html",
rds = "data/rda/preprocess.{project_name}.rds".format(project_name = p)
conda:
"envs/seurat.yaml"
log:
"logs/1-preprocess.log"
shell:
"""
Rscript -e 'rmarkdown::render( here::here("{input.rmd}"), output_file = here::here("{output.report}"), knit_root_dir = here::here(), envir = new.env(), params = list(input_samples = "{input.samples}", output_rds = "{output.rds}" ))' > {log} 2>&1
"""
# knit rmarkdown report with multiple outputs workaround: https://github.com/snakemake/snakemake/issues/178
# how to use rmd params to specify input/output files:
# https://stackoverflow.com/questions/32479130/passing-parameters-to-r-markdown
# here::here() will turn a relative path absolute, and is required for rmarkdown I/O