From d785457cd3dc3252bb2188355f1c434d8718539f Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Sun, 25 Feb 2024 18:28:19 +0900 Subject: [PATCH] change names for tm based rbh filtering --- src/strucclustutils/createcomplexreport.h | 6 ++-- src/strucclustutils/scorecomplex.cpp | 36 ++++++++++------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/strucclustutils/createcomplexreport.h b/src/strucclustutils/createcomplexreport.h index def1da59..fec3d7da 100644 --- a/src/strucclustutils/createcomplexreport.h +++ b/src/strucclustutils/createcomplexreport.h @@ -8,14 +8,12 @@ const double MAX_ASSIGNED_CHAIN_RATIO = 1.0; const double TOO_SMALL_MEAN = 1.0; const double TOO_SMALL_CV = 0.1; const double FILTERED_OUT = 0.0; -//const bool UNCLUSTERED = false; -//const bool CLUSTERED = true; const unsigned int INITIALIZED_LABEL = 0; const unsigned int MIN_PTS = 2; const float DEFAULT_EPS = 0.1; const float LEARNING_RATE = 0.1; -const float BIT_SCORE_MARGIN = 0.7; -const float DEF_BIT_SCORE = -1.0; +const float TM_SCORE_MARGIN = 0.5; +const float DEF_TM_SCORE = -1.0; const int UNINITIALIZED = 0; const unsigned int MULTIPLE_CHAINED_COMPLEX = 2; typedef std::pair compNameChainName_t; diff --git a/src/strucclustutils/scorecomplex.cpp b/src/strucclustutils/scorecomplex.cpp index 9b595a4f..4e586e55 100644 --- a/src/strucclustutils/scorecomplex.cpp +++ b/src/strucclustutils/scorecomplex.cpp @@ -27,8 +27,7 @@ struct Chain { struct ChainToChainAln { ChainToChainAln() {} -// ChainToChainAln(Chain &queryChain, Chain &targetChain, float *qCaData, float *dbCaData, Matcher::result_t &alnResult, TMaligner::TMscoreResult &tmResult) : qChain(queryChain), dbChain(targetChain), bitScore((float)alnResult.score) { - ChainToChainAln(Chain &queryChain, Chain &targetChain, float *qCaData, float *dbCaData, Matcher::result_t &alnResult, TMaligner::TMscoreResult &tmResult) : qChain(queryChain), dbChain(targetChain), bitScore((float)tmResult.tmscore) { + ChainToChainAln(Chain &queryChain, Chain &targetChain, float *qCaData, float *dbCaData, Matcher::result_t &alnResult, TMaligner::TMscoreResult &tmResult) : qChain(queryChain), dbChain(targetChain), tmScore((float)tmResult.tmscore) { alnLength = alnResult.alnLength; matches = 0; unsigned int qPos = alnResult.qStartPos; @@ -83,7 +82,7 @@ struct ChainToChainAln { resultToWrite_t resultToWrite; double superposition[12]; unsigned int label; - float bitScore; + float tmScore; float getDistance(const ChainToChainAln &o) { float dist = 0; @@ -337,8 +336,8 @@ class DBSCANCluster { distMap_t distMap; std::vector currClusters; std::set &finalClusters; - std::map qBestBitScore; - std::map dbBestBitScore; + std::map qBestTmScore; + std::map dbBestTmScore; bool runDBSCAN() { initializeAlnLabels(); @@ -489,39 +488,36 @@ class DBSCANCluster { void filterAlnsByRBH() { unsigned int alnIdx = 0; - float bitScore; + float tmScore; unsigned int qKey; unsigned int dbKey; - qBestBitScore.clear(); - dbBestBitScore.clear(); + qBestTmScore.clear(); + dbBestTmScore.clear(); for (auto qChainKey: searchResult.qChainKeys) { - qBestBitScore.insert({qChainKey, DEF_BIT_SCORE}); + qBestTmScore.insert({qChainKey, DEF_TM_SCORE}); } - for (auto dbChainKey: searchResult.dbChainKeys) { - dbBestBitScore.insert({dbChainKey, DEF_BIT_SCORE}); + dbBestTmScore.insert({dbChainKey, DEF_TM_SCORE}); } - for (auto &aln: searchResult.alnVec) { qKey = aln.qChain.chainKey; dbKey = aln.dbChain.chainKey; - bitScore = aln.bitScore; - qBestBitScore[qKey] = qBestBitScore[qKey]= std::max(qBestBitScore[qKey], dbBestBitScore[dbKey]) * BIT_SCORE_MARGIN) { + tmScore = searchResult.alnVec[alnIdx].tmScore; + if (tmScore >= std::max(qBestTmScore[qKey], dbBestTmScore[dbKey]) * TM_SCORE_MARGIN) { alnIdx ++; continue; } searchResult.alnVec.erase(searchResult.alnVec.begin() + alnIdx); } - qBestBitScore.clear(); - dbBestBitScore.clear(); + qBestTmScore.clear(); + dbBestTmScore.clear(); // return; }