Skip to content

Commit

Permalink
sdm 2.18: added AVITI fastq support
Browse files Browse the repository at this point in the history
  • Loading branch information
hifa committed Aug 2, 2024
1 parent de8b053 commit 59d4082
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 25 deletions.
3 changes: 2 additions & 1 deletion DNAconsts.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
//2.14: new model of preallocating space for input strings
//2.16: new threading model (no more dynamic threadpool) (reversed to old model)
//2.17: fixed PB rev seq checks..
//2.18: fastq autoversion updated (more robust)

static const float sdm_version = 2.17f;
static const float sdm_version = 2.18f;
static const char* sdm_status = "beta";


Expand Down
47 changes: 30 additions & 17 deletions InputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1969,10 +1969,10 @@ shared_ptr<DNA> InputStreamer::read_fastq_entry(istream & fna, qual_score &minQ
if ((lnCnt ) % 4 != 0) {
fqPassedFQsdt = false;
}

bool printB(false);
for (size_t i = 0; i < line.length(); i++) {
//really 33?
Iqual[qcnt] = minmaxQscore((qual_score)line[i] - fastQver);
Iqual[qcnt] = minmaxQscore((qual_score)line[i] - fastQver, printB);
qcnt++;
}
if (qcnt == tdn->length()) {
Expand Down Expand Up @@ -2068,27 +2068,34 @@ shared_ptr<DNA> InputStreamer::read_fastq_entry_fast(istream & fna, int& lnCnt,
corrupt = false;
return tdn;
}
void InputStreamer::minmaxQscore(shared_ptr<DNA> t) {
void InputStreamer::minmaxQscore(shared_ptr<DNA> t, bool& print) {
fqverMTX.lock();
vector<qual_score> Qs = t->getQual();
for (size_t i = 0; i < Qs.size(); i++) {
minmaxQscore(Qs[i]);
minmaxQscore(Qs[i], print);
}
fqverMTX.unlock();
}

qual_score InputStreamer::minmaxQscore(qual_score t) {
qual_score InputStreamer::minmaxQscore(qual_score t,bool& print) {
if (t < 0) { ////quick hack, since q<13 scores are the only deviants and uninteresting in most cases..
if (fqSolexaFmt){
if (t < -5) {
if (fqSolexaFmt ){
if (t < -5 && print) {
cerr << "Unusually low sloexa quality score (" << t << "); setting to 0.\n";
print = false;
}
} else {
if (t >= -5) {
cerr << "Resetting auto format to Solexa (illumina 1.0-1.3) format.\n";
fqSolexaFmt = true;
if (print) {
cerr << "Resetting auto format to Solexa (illumina 1.0-1.3) format.\n";
print = false;
}
//fqSolexaFmt = true;
} else {
cerr << "Unusually low quality score (" << t << "); setting to 0.\n";
if (print) {
cerr << "Unusually low quality score (" << t << "); setting to 0.\n";
print = false;
}
}
}
t = 0;
Expand Down Expand Up @@ -2565,9 +2572,10 @@ bool InputStreamer::getDNAlines(vector<string>& ret, int pos) {
}*/
lnCnt[pos] += 4;
if (fastQver == 0 || lnCnt[pos] == 4) { // ok first time just has to be done in function, after this can be unloaded outside
bool printB(true);
//cerr << "AutoFQ!!";
shared_ptr<DNA> tdn = make_shared<DNA>(ret, fastQver);
minmaxQscore(tdn);
minmaxQscore(tdn,printB);
auto_fq_version();
//tdn->resetQualOffset(auto_fq_version(), fqSolexaFmt);
}
Expand Down Expand Up @@ -2983,26 +2991,31 @@ void InputStreamer::setupFna(string fileS){
int InputStreamer::auto_fq_version() {
//if (QverSet) { return 33; }
int fqDiff(0);
if (minQScore >= 100 || maxQScore < 2) {
if (minQScore >= 100 || maxQScore < 2 || fastQver!=0) {
//don;t know what to do here..
return fqDiff;
}
fqverMTX.lock();
fqSolexaFmt = false;
if (minQScore >= 59 && maxQScore > 74){

if (minQScore >= 33 && maxQScore <= 88 ){
fqDiff = (fastQver - 33); fastQver = 33;

} else if (minQScore >= 59 && maxQScore > 74) {
fqDiff = (fastQver - 64); fastQver = 64;
if (minQScore < 64) { //set to illumina1.0 (solexa)
fqSolexaFmt = true;
//cerr << "Setting to illumina 1.0-1.3 (solexa) fastq version (q offset = 64, min Q=-5).";
} else {
}
else {
//cerr << "Setting to illumina 1.3-1.8 fastq version (q offset = 64).";
}
} else if (minQScore >= 33 && maxQScore <= 74) {
fqDiff = (fastQver - 33); fastQver = 33;
//cerr << "Setting to Sanger fastq version (q offset = 33).";
} else {
if (verbose) {
cerr << "\nUndecided fastq version..\n"; verbose = 0;
cerr << "\nUndecided fastq version.. reverting to +33\n"; verbose = 0;
}

//fallback to sensible std..
fqDiff = (fastQver - 33); fastQver = 33;
//exit(53);
Expand Down
4 changes: 2 additions & 2 deletions InputStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -1227,8 +1227,8 @@ class InputStreamer{

private:
string current_infiles();
inline qual_score minmaxQscore(qual_score t);// , int lnCnt);
void minmaxQscore(shared_ptr<DNA> t);// , int lnCnt);
inline qual_score minmaxQscore(qual_score t, bool& print);// , int lnCnt);
void minmaxQscore(shared_ptr<DNA> t, bool&);// , int lnCnt);
bool setupFastq_2(string, string, string);
bool setupFastaQual2(string, string, string = "fasta file");
shared_ptr<DNA> read_fastq_entry(istream & fna, qual_score &minQScore,
Expand Down
25 changes: 20 additions & 5 deletions containers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6876,6 +6876,13 @@ bool UClinks::getMAPPERline(string& segs, string& segs2,float& perID,
else if (tmp == "noisy_chimera" || tmp == "good_chimera") { //tmp == "perfect_chimera" ||
if (!doChimeraCnt) { continue; }
chimera = true;
//M04428:252:000000000-BM3M5:1:1109:4503:15901;size=52; noisy_chimera
// dqt=20;dqm=2;div=9.0;segs=2;parents=M04428:252:000000000-BM3M5:1:2110:24763:14436(1-78/0)+M04428:252:000000000-BM3M5:1:1116:5359:16203(78-186/0);

getline(ss, tmp, '\t');
size_t p1(tmp.find("parents=") + 4);//4
size_t p2(tmp.find(");", p1) + 2);
segs2 = tmp.substr(p1, p2 - p1 - 1);
}
else if (tmp == "perfect_chimera") {
removeSizeStr(segs);
Expand Down Expand Up @@ -6915,12 +6922,20 @@ bool UClinks::getMAPPERline(string& segs, string& segs2,float& perID,

//finished parsing, now reformat to get matching sample

if ( chimera && UPARSE8up) {
tarsV = splitByComma(segs2, false, '+');
for ( uint kk = 0; kk < tarsV.size(); kk++ ) {
tarsV[kk] = tarsV[kk].substr(0,tarsV[kk].find_last_of("("));
if (chimera){
if (UPARSE9up) {
tarsV = splitByComma(segs2, false, '+');
for (uint kk = 0; kk < tarsV.size(); kk++) {
tarsV[kk] = tarsV[kk].substr(0, tarsV[kk].find_last_of("("));
}
}
} else if (!chimera){
else if (UPARSE8up) {
tarsV = splitByComma(segs2, false, '+');
for (uint kk = 0; kk < tarsV.size(); kk++) {
tarsV[kk] = tarsV[kk].substr(0, tarsV[kk].find_last_of("("));
}
}
} else {
tarsV.push_back(segs2);
}
matrixUnit splChim = (matrixUnit)tarsV.size();
Expand Down

0 comments on commit 59d4082

Please sign in to comment.