Skip to content

Commit

Permalink
updated to v0.7.8
Browse files Browse the repository at this point in the history
  • Loading branch information
bbuchfink committed Mar 30, 2015
1 parent 7560ef8 commit 8380623
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 22 deletions.
Binary file modified bin/diamond
Binary file not shown.
6 changes: 6 additions & 0 deletions src/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[0.7.8]
- fixed a bug that could produce an incorrect sort order of HSPs

[0.7.7]
- fixed a number formatting error in the SAM output format

[0.7.6]
- fixed a formatting error in CIGAR strings

Expand Down
2 changes: 1 addition & 1 deletion src/align/align_queries.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ void align_queries(const Trace_pt_buffer<_locr,_locl> &trace_pts, Output_stream*
log_stream << "Processing query bin " << bin+1 << '/' << trace_pts.bins() << '\n';
task_timer timer ("Loading trace points", false);
trace_pts.load(v, bin);
v.init();
timer.go("Sorting trace points");
merge_sort(v.begin(), v.end(), program_options::threads());
v.init();
timer.go("Computing alignments");
if(ref_header.n_blocks > 1) {
Align_context<_val,_locr,_locl,Temp_output_buffer<_val> > context (v, output_file);
Expand Down
4 changes: 2 additions & 2 deletions src/basic/const.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct Const
{

enum {
build_version = 56,
build_version = 57,
build_compatibility = 52,
daa_version = 0,
seedp_bits = 10,
Expand All @@ -46,7 +46,7 @@ struct Const

};

const char* Const::version_string = "0.7.7";
const char* Const::version_string = "0.7.8";
const char* Const::program_name = "diamond";
const char* Const::id_delimiters = " \a\b\f\n\r\t\v";

Expand Down
1 change: 1 addition & 0 deletions src/basic/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ double toppercent;
string daa_file;
string output_format;
bool forwardonly;
unsigned fetch_size;

Aligner_mode aligner_mode;
Command command;
Expand Down
1 change: 1 addition & 0 deletions src/basic/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ namespace program_options
extern string output_format;
extern string output_file;
extern bool forwardonly;
extern unsigned fetch_size;

typedef enum { fast=0, sensitive=1, very_sensitive=2 } Aligner_mode;
extern Aligner_mode aligner_mode;
Expand Down
14 changes: 5 additions & 9 deletions src/basic/setup.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,25 +129,21 @@ void setup_search_params(pair<size_t,size_t> query_len_bounds, size_t chunk_db_l

const double b = po::min_bit_score == 0 ? score_matrix::get().bitscore(po::max_evalue, ref_header.letters, query_len_bounds.first) : po::min_bit_score;

if(query_len_bounds.second <= 40) {
po::set_option(po::min_identities, 10u);
po::set_option(po::min_ungapped_raw_score, score_matrix::get().rawscore(std::min(27.0, b)));
} else {
po::set_option(po::min_identities, 9u);
po::set_option(po::min_ungapped_raw_score, score_matrix::get().rawscore(std::min(23.0, b)));
}
po::set_option(po::min_identities, 18u);
po::min_ungapped_raw_score = score_matrix::get().rawscore(std::min(po::min_ungapped_raw_score == 0 ? 19.0 : po::min_ungapped_raw_score, b));

if(query_len_bounds.second <= 80) {
if(query_len_bounds.second <= 128) {
const int band = po::read_padding<_val>(query_len_bounds.second);
po::set_option(po::window, (unsigned)(query_len_bounds.second + band));
po::set_option(po::hit_band, band);
po::set_option(po::min_hit_score, score_matrix::get().rawscore(b));
} else {
po::set_option(po::window, 40u);
po::set_option(po::hit_band, 5);
po::set_option(po::min_hit_score, score_matrix::get().rawscore(std::min(29.0, b)));
po::min_hit_score = score_matrix::get().rawscore(std::min(po::min_hit_score == 0 ? 19.0 : po::min_hit_score, b));
}
log_stream << "Query len bounds " << query_len_bounds.first << ' ' << query_len_bounds.second << endl;
log_stream << "Minimum bit score = " << b << endl;
log_stream << "Search parameters " << po::min_ungapped_raw_score << ' ' << po::min_hit_score << ' ' << po::hit_cap << endl;
}

Expand Down
1 change: 1 addition & 0 deletions src/basic/shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ struct shape
unsigned r = Reduction<_val>::reduction(l);
f += background_freq[r];
s *= Reduction<_val>::reduction.size();
//s *= 5;
s += uint64_t(r);
}
if(use_seed_freq<_val>() && f > program_options::max_seed_freq) return false;
Expand Down
12 changes: 9 additions & 3 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,14 @@ int main(int ac, const char* av[])
("window,w", po::value<unsigned>(&program_options::window)->default_value(0), "window size for local hit search")
("xdrop", po::value<int>(&program_options::xdrop)->default_value(20), "xdrop for ungapped alignment")
("gapped-xdrop,X", po::value<int>(&program_options::gapped_xdrop)->default_value(20), "xdrop for gapped alignment in bits")
("ungapped-score", po::value<int>(&program_options::min_ungapped_raw_score)->default_value(0), "minimum ungapped raw alignment score to continue local extension")
("ungapped-score", po::value<int>(&program_options::min_ungapped_raw_score)->default_value(0), "minimum raw alignment score to continue local extension")
("hit-band", po::value<int>(&program_options::hit_band)->default_value(0), "band for hit verification")
("hit-score", po::value<int>(&program_options::min_hit_score)->default_value(0), "minimum score to keep a tentative alignment")
("band", po::value<int>(&program_options::padding)->default_value(0), "band for dynamic programming computation")
("shapes,s", po::value<unsigned>(&program_options::shapes)->default_value(0), "number of seed shapes (0 = all available)")
("index-mode", po::value<unsigned>(&program_options::index_mode)->default_value(0), "index mode (1=4x12, 2=16x9)");
("index-mode", po::value<unsigned>(&program_options::index_mode)->default_value(0), "index mode (1=4x12, 2=16x9)")
("fetch-size", po::value<unsigned>(&program_options::fetch_size)->default_value(4096), "trace point fetch size")
;
//("no-traceback,r", "disable alignment traceback");
//("compress-temp", po::value<unsigned>(&program_options::compress_temp)->default_value(0), "compression for temporary output files (0=none, 1=gzip)");

Expand Down Expand Up @@ -168,10 +170,14 @@ int main(int ac, const char* av[])
} else if (program_options::command == program_options::makedb && vm.count("in") && vm.count("db")) {
if(vm.count("block-size") == 0)
program_options::chunk_size = 2;
#ifdef EXTRA
if(program_options::db_type == "nucl")
make_db(Nucleotide());
else
else if(program_options::db_type == "prot")
make_db(Amino_acid());
#else
make_db(Amino_acid());
#endif
} else if ((program_options::command == program_options::blastp
|| program_options::command == program_options::blastx
#ifdef EXTRA
Expand Down
6 changes: 3 additions & 3 deletions src/output/join_blocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void join_blocks(unsigned ref_blocks, DAA_output &master_out, const vector<Temp_
Block_output::Iterator r;
for(unsigned i=0;i<ref_blocks;++i) {
files.push_back(new Block_output (i, tmp_file[i]));
if(files.back()->next(r, std::numeric_limits<unsigned>::max()))
if(files.back()->next(r, std::numeric_limits<unsigned>::max(), std::numeric_limits<unsigned>::max()))
records.push_back(r);
}
std::make_heap(records.begin(), records.end());
Expand All @@ -63,7 +63,7 @@ void join_blocks(unsigned ref_blocks, DAA_output &master_out, const vector<Temp_
}
const bool same_subject = n_target_seq > 0 && b == block && next.info_.subject_id == subject;
if(program_options::output_range(n_target_seq, next.info_.score, top_score) || same_subject) {
//printf("q=%u s=%u n=%u ss=%u\n",query, next.info_.subject_id, n_target_seq, same_subject);
//printf("q=%u s=%u n=%u ss=%u\n",query, next.info_.subject_id, n_target_seq, same_subject, next.info_.score);
DAA_output::write_record(buf, next.info_);
statistics.inc(Statistics::MATCHES);
if(!same_subject) {
Expand All @@ -76,7 +76,7 @@ void join_blocks(unsigned ref_blocks, DAA_output &master_out, const vector<Temp_

std::pop_heap(records.begin(), records.end());
records.pop_back();
if(files[b]->next(r, subject)) {
if(files[b]->next(r, subject, query)) {
records.push_back(r);
std::push_heap(records.begin(), records.end());
}
Expand Down
8 changes: 5 additions & 3 deletions src/output/output_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ struct Block_output : public Buffered_file
bool same_subject_;
Intermediate_record info_;
bool operator<(const Iterator &rhs) const
{ return info_.query_id > rhs.info_.query_id || (info_.query_id == rhs.info_.query_id && (!same_subject_ || info_.score < rhs.info_.score)); }
{ return info_.query_id > rhs.info_.query_id ||
(info_.query_id == rhs.info_.query_id && (rhs.same_subject_ ||
(!rhs.same_subject_ && info_.score < rhs.info_.score))); }
};

bool next(Iterator &it, unsigned subject)
bool next(Iterator &it, unsigned subject, unsigned query)
{
if(this->eof())
return false;
it.info_.read(*this);
it.block_ = block_;
it.same_subject_ = it.info_.subject_id == subject;
it.same_subject_ = it.info_.subject_id == subject && it.info_.query_id == query;
return true;
}

Expand Down
46 changes: 45 additions & 1 deletion src/search/trace_pt_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,34 @@ struct Trace_pt_list : public vector<hit<_locr,_locl> >
void init()
{
pos_ = this->begin();
p_.clear();
p_.push_back(0);
idx_ = 0;
const unsigned c = query_contexts();
/*typename vector<hit<_locr,_locl> >::iterator i = this->begin()+program_options::fetch_size;
for(; i < this->end(); i=std::min(i+program_options::fetch_size, this->end())) {
const unsigned q = i->query_/c;
for(; i<this->end() && i->query_/c == q; ++i);
//printf("%lu %u %u\n", i-this->begin(), i->query_/c, q);
//printf("%lu\n",i - this->begin());
//std::terminate();
p_.push_back(i - this->begin());
}*/
/*typename vector<hit<_locr,_locl> >::iterator i = this->begin();
unsigned total=0,count=1;
for(; i < this->end();) {
unsigned n=0;
const unsigned min_size = 4*total/count/5 + 1;
for(;i<this->end() && n<min_size;) {
const unsigned q = i->query_/c;
for(; i<this->end() && i->query_/c == q; ++i)
++n;
}
++count;
total += n;
p_.push_back(i - this->begin());
}
p_.push_back(i - this->begin());*/
}
struct Query_range
{
Expand All @@ -52,15 +80,28 @@ struct Trace_pt_list : public vector<hit<_locr,_locl> >
{ }
bool operator()()
{

begin = parent_.pos_;
end = std::min(begin + 4096, parent_.end());
//end = std::min(begin + 4096, parent_.end());
end = std::min(begin + program_options::fetch_size, parent_.end());
if(end >= parent_.end())
return false;
const unsigned c = query_contexts(), q = end->query_/c;
for(; end<parent_.end() && end->query_/c == q; ++end);
//printf("%lu %u %u\n", end-parent_.begin(), end->query_/c, q);
parent_.pos_ = end;
//printf("%lu\n",end - parent_.begin());
//printf("%lu %lu\n",begin-parent_.begin(),end-parent_.begin());
return end < parent_.end();
}
/*bool operator()()
{
begin = parent_.begin()+parent_.p_[parent_.idx_];
end = parent_.begin()+parent_.p_[parent_.idx_+1];
printf("%lu %lu %lu\n", parent_.p_[parent_.idx_], parent_.p_[parent_.idx_+1], parent_.p_[parent_.idx_+1]-parent_.p_[parent_.idx_]);
++parent_.idx_;
return parent_.idx_ < parent_.p_.size()-1;
}*/
typename Trace_pt_list::iterator begin, end;
private:
Trace_pt_list &parent_;
Expand All @@ -69,6 +110,9 @@ struct Trace_pt_list : public vector<hit<_locr,_locl> >
{ return Query_range (*this); }
private:
typename vector<hit<_locr,_locl> >::iterator pos_;
vector<size_t> p_;
unsigned idx_;
};

#endif /* TRACE_PT_BUFFER_H_ */

0 comments on commit 8380623

Please sign in to comment.