-
Notifications
You must be signed in to change notification settings - Fork 0
/
assemble
executable file
·144 lines (126 loc) · 4.71 KB
/
assemble
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#! /usr/bin/env ruby
require 'rubygems'
require 'trollop'
require 'bio'
# TODO: migrate to Assembler object
# TODO: move all filesystem calls to ruby methods
# options
$opts = Trollop::options do
version "v0.0.1a"
banner <<-EOS
--------------------- assemble --------------------
run assembler (by default, velvet-oases)
default options are included, but can be overridden
---------------------------------------------------
EOS
opt :assembler, "assembler to use (choice of: velvetoases, soapdt, trinity)", :required => true, :type => String
opt :left, "left reads file in fastq/fasta format", :required => true, :type => String
opt :right, "right reads file in fastq/fasta format", :required => true, :type => String
opt :readformat, "format of reads file (fasta/fastq)", :default => 'fastq', :type => String
opt :insertsize, "mean insert size", :default => 200, :type => Integer
opt :insertsd, "insert size standard deviation", :default => 50, :type => Integer
opt :ks, "list of kmer sizes to use, e.g 31,41,51,61", :required => true, :type => String
opt :threads, "max number of threads to use (default 8)", :default => 8, :type => Integer
end
KSIZES = $opts.ks.split(',')
# catch exit statuses (cluster sniping detector!)
trap("SIGTERM") {
puts "process killed!"
}
trap("SIGINT") {
puts "process interrupted!"
}
trap("SIGQUIT") {
puts "process quitting!"
}
# check options are valid
# assembler
validassemblers = ['velvetoases', 'soapdt', 'trinity']
if validassemblers.include? $opts.assembler
$opts.assembler
else
abort("Assembler option '#{$opts.assembler}' invalid, please choose from [#{validassemblers.join(', ')}]")
end
# read format
validreadformats = ['fasta', 'fastq']
unless validreadformats.include? $opts.readformat
abort("Read format option '#{$opts.readformat}' invalid, please choose from [#{validreadformats.join(', ')}]")
end
# setup specific to assemblers
# set thread number for velvet
if $opts.assembler == 'velvetoases'
threadsetter = 'setmpthreads.sh'
File.open(threadsetter, 'w') do |f|
f << "export OMP_NUM_THREADS=#{$opts.threads}"
end
`sh #{threadsetter}`
File.delete(threadsetter)
puts "OpenMP number of threads set to #{$opts.threads} globally"
end
# assembler functions - should accept a k-mer size, left and right read files
# and a boolean flag for the first assembly if used in kdescent
def soapdt(k, l, r, first)
# make config file
rf = $opts.readformat == 'fastq' ? 'q' : 'f'
File.open("soapdt.config", "w") do |conf|
conf.puts "max_rd_len=20000"
conf.puts "[LIB]"
conf.puts "avg_ins=#{$opts.insertsize}"
conf.puts "reverse_seq=0"
conf.puts "asm_flags=3"
conf.puts "rank=2"
conf.puts "#{rf}1=#{l}"
conf.puts "#{rf}2=#{r}"
if !first
conf.puts "[LIB]"
conf.puts "asm_flags=2"
conf.puts "rank=1" # prioritise the higher-k contigs in scaffolding
conf.puts "longreads.fa"
end
end
# construct command
cmd = "/applications/soapdenovo-trans/SOAPdenovo-Trans-127mer all"
cmd += " -s soapdt.config" # config file
cmd += " -a 30" # memory assumption
cmd += " -o k#{k}" # output directory
cmd += " -K #{k}" # kmer size
cmd += " -p #{$opts.threads}" # number of threads
cmd += " -d 3" # minimum kmer frequency
cmd += " -F" # fill gaps in scaffold
cmd += " -M 1" # strength of contig flattening
cmd += " -D 1" # delete edges with coverage no greater than
cmd += " -L 200" # minimum contig length
cmd += " -u" # unmask high coverage contigs before scaffolding
cmd += " -e 2" # delete contigs with coverage no greater than
cmd += " -t 5" # maximum number of transcripts from one locus
# cmd += " -S" # scaffold structure exists
# run command
`#{cmd} > k#{k}.log`
# cleanup unneeded files
`mkdir k#{k}`
`mv k#{k}.scafSeq k#{k}/transcripts.fa`
`rm k#{k}.*`
end
def velvetoases(k, l, r, first)
longstr = first ? "" : "-fasta -long longreads.fa "
`~/apps/velveth k#{k} #{k} -#{$opts.readformat} -shortPaired #{l} #{r} #{longstr} &> k#{k}.log`
`~/apps/velvetg k#{k} -read_trkg yes -cov_cutoff 3 -scaffolding yes -min_contig_lgth 200 -ins_length #{$opts.insertsize} -ins_length_sd #{$opts.insertsd} -min_pair_count 6 &>> k#{k}.log`
`~/apps/oases k#{k} -ins_length #{$opts.insertsize} -ins_length_sd #{$opts.insertsd} -scaffolding yes -cov_cutoff 3 -min_trans_lgth 200 -edgeFractionCutoff 0.1 -min_pair_count 6 &>> k#{k}.log`
# cleanup unneeded files
Dir.chdir("k#{k}") do
['Sequences', 'Roadmaps', 'PreGraph', 'Graph2', 'LastGraph'].each do |f|
File.delete(f)
end
end
end
def assemble(k, l, r, first)
case $opts.assembler
when 'velvetoases'
velvetoases(k, l, r, first)
when 'soapdt'
soapdt(k, l, r, first)
end
end
KSIZES.each do |k|
assemble(k, $opts.left, $opts.right, true)
end