-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
44 lines (33 loc) · 1.38 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
import os
import glob
configfile: 'config.yaml'
# take information from config
PATH_FASTQ = config["path"]["fastq"]
PATH_FUSION = config["path"]["fusion"]
PATH_QC = config["path"]["qc"]
PATH_LOG = config["path"]["log"]
PATH_BAM = config['path']['bam']
REFLIB = config['star_fusion']['reflib']
INDEX = config['star_fusion']['index']
# some options are taken from environmental variables
if config['Use_global'] is True:
for env in config['environmental']:
if config['environmental'][env] in os.environ:
globals()[env] = os.environ[config['environmental'][env]]
print(env + ' is overrived by enviromental variable ' + config['environmental'][env] + ': ' + globals()[env])
# if samples table exists, take fastq listed in the table,
# otherwise, it will just take every fastqs in PATH_FASTQ
if os.path.isfile('samples.tsv'):
samples = pd.read_csv('samples.tsv', sep='\t')
IDs = samples.IDs
print("obtaining samples from config.yaml")
else:
print("obtaining samples from the path : " + PATH_FASTQ)
Temp = glob.glob(PATH_FASTQ+"/*.fastq.gz")
IDs = [os.path.splitext(os.path.splitext(os.path.basename(tmp))[0])[0] for tmp in Temp]
print('found ' + str(len(IDs)) + ' fastq files to be processed')
rule all:
input:
expand(PATH_FUSION+"/{sample}.fusion_candidate", sample=IDs)
include: "rules/star_fusion.snakefile"