-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathProcessing_sequences_large_scale_batch.py
executable file
·162 lines (151 loc) · 8.21 KB
/
Processing_sequences_large_scale_batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python
#import math
import sys
import os
import commands
def Get_info(file):
id, sample, info, gene, directory,pair,pair_final,dir, platform,spec,primer,other,reverse_primer_group,ORF_filter=[],[],[],[],[],[],[],[],[],[],[],[],[],[]
fh=open(file,"r")
ind = 0
for l in fh:
ind = ind+1
if(l[0]!="#" and len(l)>3):
l=l.strip().split()
id.append(l[0])
sample.append(l[1])
info.append(l[2])
gene.append(l[3])
directory.append(l[4])
pair.append(l[5])
pair_final.append(l[6])
dir.append(l[7])
platform.append(l[8])
spec.append(l[9])
if(len(l)>=11):primer.append(l[10])
#else:primer.append("LIBRARY/FR1_primers.txt")
if(len(l)>=14):reverse_primer_group.append(l[13])
else:reverse_primer_group.append("STANDARD")
if(len(l)>=12):
lis = []
for i in range(11,len(l)):
lis.append(l[i])
other.append(",".join(lis))
else:other.append('')
if(len(l)>=15):ORF_filter.append(l[14])
else:ORF_filter.append("True")
fh.close()
return(id, sample, info, gene, directory, pair,pair_final,dir, platform,spec,primer,other,reverse_primer_group,ORF_filter)
def Set_running(comm,wkg_dir, id):
file = "/gpfs3/well/immune-rep/shared/CODE/BCR_TCR_PROCESSING_PIPELINE/sbatch-RBR/template_run_command.sh"
fh=open(file,"r")
out = ''
for l in fh:
l=l.strip()
if(l!="XXXCOMMANDXXX"):
out=out+l+"\n"
else:
out=out+comm+"\n"
fh.close()
file = "/gpfs3/well/immune-rep/shared/CODE/BCR_TCR_PROCESSING_PIPELINE/sbatch-RBR/run_command_"+id+".sh"
fh=open(file, "w")
fh.write(out)
fh.close()
os.system("sbatch -p short "+file+" -o /gpfs3/well/immune-rep/shared/CODE/BCR_TCR_PROCESSING_PIPELINE/sbatch-RBR/sbatch_out_"+id+".txt" )
print file
return()
args=sys.argv
queue = "short.qc"
#queue = "long.qc"
python = "/usr/bin/python2.7"
if (len(args)<5):
queue = '-q normal'
command = 'python Processing_sequences_large_scale.py [sample file list] [commands (comma separated list)] [bsub command: Y/N] [print commands: Y/N] [run commands: Y/N]'
print "SEQUENCE ANALYSIS PIPELINE: Creates networks from MiSeq data"
print "USAGE:"
print command,"\n"
os.system("cat Command_outline.txt")
print "\n"
else:
file = args[1]
command = args[2]
bsub_command = args[3]
print_command = args[4]
run_command = args[5]
command=command.split(",")
ids, samples, infos, gene, source,pairs,pairs_final,dirs, platform,spec,primer,others,reverse_primer_group,ORF_filter=Get_info(file)
wkg_dir = commands.getoutput("pwd")+"/"
wkg_dir = "/well/immune-rep/shared/CODE/BCR_TCR_PROCESSING_PIPELINE/"
if(bsub_command not in ["Y","N"]):print 'python Processing_sequences.py [sample file list] [commands (comma separated list)] [bsub command: Y/N] [print commands: Y/N] [run commands: Y/N]\n\tError: bsub command must be: Y or N'
if(bsub_command not in ["Y","N"]):print 'python Processing_sequences.py [sample file list] [commands (comma separated list)] [bsub command: Y/N] [print commands: Y/N] [run commands: Y/N] \n\tError: print command must be: Y or N'
if(bsub_command not in ["Y","N"]):print 'python Processing_sequences.py [sample file list] [commands (comma separated list)] [bsub command: Y/N] [print commands: Y/N] [run commands: Y/N] \n\tError: run command must be: Y or N'
idss,dirss='',''
commands = []
bsubs = []
#### run per sample
for i in range(0,len(samples)):
info,sample, gene_types, pair,pair_final, dir,platforms, primers,other,ORF_filt=infos[i], samples[i], gene[i], pairs[i],pairs_final[i], dirs[i], platform[i], primer[i],others[i],ORF_filter[i]
bsub = ''
id,sources,species=ids[i],source[i],spec[i]
if(bsub_command=="Y"):
bsub = " | xargs -i echo qsub -P immune-rep.prjc -q "+queue+" -b y -o "+wkg_dir+"out_STANDARD_"+id+" -e "+wkg_dir+"error_log_"+id+" -N job_name \"{}\" | sh"
bsubs.append(bsub)
if( "1" in command):
command1 = python+" "+wkg_dir+"Read_processing_and_quality.py "+dir+" "+id+" "+sample+" "+gene_types+" "+pair+" "+species+" "+sources +" "+str(200)+" "+primers+" "+platforms+" 1 "+other+" "+reverse_primer_group[i]
#command1 = python+" "+wkg_dir+"Read_processing_and_quality_QIAGEN.py "+dir+" "+id+" "+sample+" "+gene_types+" "+pair+" "+species+" "+sources +" "+str(200)+" "+primers+" "+platforms+" 1 "+other+" "+reverse_primer_group[i]
commands.append(command1)
if( "2" in command):
command1 = python+" "+wkg_dir+"Read_processing_and_quality.py "+dir+" "+id+" "+sample+" "+gene_types+" "+pair+" "+species+" "+sources +" "+str(200)+" "+primers+" "+platforms+" 2 "+other+" "+reverse_primer_group[i]+" "+ORF_filt
commands.append(command1)
if( "2UJ" in command):
command1 = python+" "+wkg_dir+"Read_processing_and_quality_unjoined.py "+dir+" "+id+" "+sample+" "+gene_types+" "+pair+" "+species+" "+sources +" "+str(200)+" "+primers+" "+platforms+" 2 "+other+" "+reverse_primer_group[i]
commands.append(command1)
if( "3" in command):
command1 = python+" "+wkg_dir+"Read_processing_and_quality.py "+dir+" "+id+" "+sample+" "+gene_types+" "+pair+" "+species+" "+sources +" "+str(200)+" "+primers+" "+platforms+" 3 "+other+" "+reverse_primer_group[i]
commands.append(command1)
if( "4" in command):
command2 = python+" "+wkg_dir+"Generate_repertoire_statistics.py "+dir+"ORIENTATED_SEQUENCES/ANNOTATIONS/ "+id+" "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Fully_reduced_"+id+".fasta "+dir+"ORIENTATED_SEQUENCES/Filtered_ORFs_sequences_all_"+id+".fasta "+gene_types+" "+species+" "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Cluster_identities_"+id+".txt ANNOTATE,STATISTICS "+reverse_primer_group[i]
commands.append(command2)
if("ISO1" in command):
command1 = python+" "+wkg_dir+"IsoTyper_2.0.py "+id+" "+id+" "+dir+" "+species+" "+reverse_primer_group[i]+" "+file
commands.append(command1)
if("ISO1_PRODUCTIVE" in command):
command1 = python+" "+wkg_dir+"IsoTyper_2.0.py "+id+"_productive "+id+"_productive "+dir+" "+species+" "+reverse_primer_group[i]+" "+file
commands.append(command1)
if("ISO1_NON_PRODUCTIVE" in command):
command1 = python+" "+wkg_dir+"IsoTyper_2.0.py "+id+"_unproductive "+id+"_unproductive "+dir+" "+species+" "+reverse_primer_group[i]+" "+file
commands.append(command1)
if("ISO2" in command):
command1 = python+" "+wkg_dir+"Per_isotype_cluster_analyses.py "+id+" "+id+" "+dir+" "+species+" "+reverse_primer_group[i]+" "+file
commands.append(command1)
if("NONISO1" in command):
command1 = python+" "+wkg_dir+"Non_isotyper_1.0.py "+id+" "+id+" "+dir+" "+species+" "
commands.append(command1)
if("TCRISO1" in command):
command1 = python+" "+wkg_dir+"TCRoTyper_1.0.py "+id+" "+id+" "+dir+" "+species+" "+reverse_primer_group[i]
commands.append(command1)
if("CSR" in command):
command14 = python+" "+wkg_dir+"Class_switch_recombination_analysis.py "+dir+"ORIENTATED_SEQUENCES/ANNOTATIONS/ "+id+" "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Fully_reduced_"+id+".fasta "+dir+"ORIENTATED_SEQUENCES/Filtered_ORFs_sequences_all_"+id+".fasta "+gene_types+" "+species+" "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Cluster_identities_"+id+".txt 1"
commands.append(command14)
if("SUBSAMPLE" in command):
command16 = python+" "+wkg_dir+"Subsampling_networks.py "+dir+"ORIENTATED_SEQUENCES/ANNOTATIONS/ "+id+" "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Fully_reduced_"+id+".fasta "+dir+"ORIENTATED_SEQUENCES/NETWORKS/Edges_"+id+".txt"
commands.append(command16)
if( "CONSENSUS" in command):
command1 = python+" "+wkg_dir+"Get_consensus_counts.py "+dir+" "+id+" "
commands.append(command1)
##### only run once per batch here:
if("5" in command):
command2 = python+" "+wkg_dir+"Get_batch_information.py "+file
commands.append(command2)
if("6" in command):
command2 = python+" "+wkg_dir+"Combine_extract_IMGT_information.py "+file+" "+dir
commands.append(command2)
##### run commands
for i in range(0,len(commands)):
comm, bsub, id = commands[i], bsubs[i],ids[i]
if(bsub!=''):
print "set up"
Set_running(comm,wkg_dir, id)
#comm = "echo \'"+comm+"\' "+bsub
print i, id
#if(print_command=="Y"):print comm, "\n"
#if(run_command=="Y"): os.system(comm)