Skip to content

Commit

Permalink
fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Shostina committed Jan 26, 2024
2 parents 472a428 + a57ef61 commit 93bba6a
Show file tree
Hide file tree
Showing 1,121 changed files with 93,388 additions and 76,867 deletions.
64 changes: 47 additions & 17 deletions command_line/querypaths.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <CLI/CLI.hpp>

#include <QDateTime>
#include <type_traits>

CLI::App *addQueryPathsSubcommand(CLI::App &app,
QueryPathsCmd &cmd) {
Expand All @@ -38,10 +39,10 @@ CLI::App *addQueryPathsSubcommand(CLI::App &app,
->required();
qp->add_flag("--pathfasta", cmd.m_pathFasta, "Put all query path sequences in a multi-FASTA file, not in the TSV file");
qp->add_flag("--hitsfasta", cmd.m_hitsFasta, "Produce a multi-FASTA file of all BLAST hits in the query paths");
qp->add_flag("--gfapaths", cmd.m_gfaPaths, "Align to GFA path sequences in addition to nodes");

qp->footer("Bandage querypaths searches for queries in the graph using BLAST and outputs the results to a tab-delimited file.");


return qp;

}
Expand Down Expand Up @@ -85,7 +86,11 @@ int handleQueryPathsCmd(QApplication *app,

QDateTime startTime = QDateTime::currentDateTime();

out << Qt::endl << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Loading graph... " << Qt::flush;
auto log = [&out](const char * msg) {
out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") " << msg << Qt::flush;
};

log("Loading graph... ");

if (!g_assemblyGraph->first()->loadGraphFromFile(cmd.m_graph.c_str())) {
err << "Bandage-NG error: could not load " << cmd.m_graph.c_str() << Qt::endl;
Expand All @@ -96,18 +101,19 @@ int handleQueryPathsCmd(QApplication *app,
err << g_blastSearch->lastError() << Qt::endl;
return 1;
}
out << "done" << Qt::endl;

out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Running BLAST search... " << Qt::flush;
log("Running BLAST search... ");
QString blastError = g_blastSearch->doAutoGraphSearch(g_assemblyGraph,
g_settings->blastQueryFilename,
false, /* include paths */
cmd.m_gfaPaths,
g_settings->blastSearchParameters);
if (!blastError.isEmpty()) {
err << Qt::endl << blastError << Qt::endl;
return 1;
}
out << "done" << Qt::endl;
out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Saving results... " << Qt::flush;
log("Saving results... ");

// Create the table file.
tableFile.open(QIODevice::WriteOnly | QIODevice::Text);
Expand All @@ -117,6 +123,8 @@ int handleQueryPathsCmd(QApplication *app,
tableOut << "Query\t"
"Path\t"
"Length\t"
"Query start\t"
"Query end\t"
"Query covered by path\t"
"Query covered by hits\t"
"Mean hit identity\t"
Expand Down Expand Up @@ -144,22 +152,43 @@ int handleQueryPathsCmd(QApplication *app,
QList<QString> hitSequenceIDs;
QList<QByteArray> hitSequences;

auto maybeNA = [](auto val) -> QString {
using ValT = typeof(val);
if constexpr (std::is_same_v<ValT, double>) {
if (std::isnan(val))
return "N/A";
} else if constexpr (std::is_same_v<ValT, SciNot>) {
if (std::isnan(val.toDouble()))
return "N/A";
return val.asString(false);
} else {
if (val < 0)
return "N/A";

return QString::number(val);
}

return "N/A";
};

for (const auto *query : g_blastSearch->queries()) {
unsigned num = 0;
for (const auto & queryPath : query->getPaths()) {
Path path = queryPath.getPath();

tableOut << query->getName() << "\t"
<< path.getString(true) << "\t"
<< QString::number(path.getLength()) << "\t"
<< QString::number(100.0 * queryPath.getPathQueryCoverage()) << "%\t"
<< QString::number(100.0 * queryPath.getHitsQueryCoverage()) << "%\t"
<< QString::number(queryPath.getMeanHitPercIdentity()) << "%\t"
<< QString::number(queryPath.getTotalHitMismatches()) << "\t"
<< QString::number(queryPath.getTotalHitGapOpens()) << "\t"
<< QString::number(100.0 * queryPath.getRelativePathLength()) << "%\t"
<< queryPath.getAbsolutePathLengthDifferenceString(false) << "\t"
<< queryPath.getEvalueProduct().asString(false) << "\t";
tableOut << query->getName() << '\t'
<< path.getString(true) << '\t'
<< QString::number(path.getLength()) << '\t'
<< QString::number(queryPath.queryStart()) << '\t'
<< QString::number(queryPath.queryEnd()) << '\t'
<< QString::number(queryPath.getPathQueryCoverage()) << '\t'
<< QString::number(queryPath.getHitsQueryCoverage()) << '\t'
<< maybeNA(queryPath.getMeanHitPercIdentity()) << '\t'
<< maybeNA(queryPath.getTotalHitMismatches()) << '\t'
<< maybeNA(queryPath.getTotalHitGapOpens()) << '\t'
<< QString::number(queryPath.getRelativePathLength()) << '\t'
<< queryPath.getAbsolutePathLengthDifferenceString(false) << '\t'
<< maybeNA(queryPath.getEvalueProduct()) << '\t';

// If we are using a separate file for the path sequences, save the
// sequence along with its ID to save later, and store the ID here.
Expand Down Expand Up @@ -212,13 +241,14 @@ int handleQueryPathsCmd(QApplication *app,

out << "done" << Qt::endl;

out << Qt::endl << "Results: " + tableFilename << Qt::endl;
out << Qt::endl << "Results: " + tableFilename << Qt::endl;
if (cmd.m_pathFasta)
out << " " + pathFastaFilename << Qt::endl;
if (cmd.m_hitsFasta)
out << " " + hitsFastaFilename << Qt::endl;

out << Qt::endl << "Summary: Total BLAST queries: " << g_blastSearch->getQueryCount() << Qt::endl;
out << " Total hits: " << g_blastSearch->getNumHits() << Qt::endl;
out << " Queries with found paths: " << g_blastSearch->getQueryCountWithAtLeastOnePath() << Qt::endl;
out << " Total query paths: " << g_blastSearch->getQueryPathCount() << Qt::endl;

Expand Down
1 change: 1 addition & 0 deletions command_line/querypaths.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct QueryPathsCmd {
std::string m_prefix;
bool m_pathFasta = false;
bool m_hitsFasta = false;
bool m_gfaPaths = false;
};

CLI::App *addQueryPathsSubcommand(CLI::App &app,
Expand Down
27 changes: 13 additions & 14 deletions graph/assemblygraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@
#include <QSet>

#include <algorithm>
#include <iterator>
#include <limits>
#include <cmath>
#include <utility>
#include <deque>

AssemblyGraph::AssemblyGraph()
: m_kmer(0),
m_sequencesLoadedFromFasta(NOT_READY)
: m_sequencesLoadedFromFasta(NOT_READY)
{
clearGraphInfo();
}
Expand Down Expand Up @@ -296,7 +296,6 @@ void AssemblyGraph::determineGraphInfo()
m_edgeCount = edgeCount;
m_totalLength = totalLength;
m_meanDepth = getMeanDepth();
m_pathCount = m_deBruijnGraphPaths.size();

std::sort(nodeDepths.begin(), nodeDepths.end());

Expand Down Expand Up @@ -412,17 +411,17 @@ bool AssemblyGraph::loadCSV(const QString &filename, QStringList *columns, QStri

std::vector<DeBruijnNode *> nodes;
// See if this is a path name
{
QString pathName = nodeName;
if (g_settings->multyGraphMode)
pathName = QString::number(getGraphId()) + "_" + nodeName;
auto pathIt = m_deBruijnGraphPaths.find(pathName.toStdString());
if (pathIt != m_deBruijnGraphPaths.end()) {
for (auto *node : (*pathIt)->nodes()) {
nodes.emplace_back(node);
if (!g_settings->doubleMode)
nodes.emplace_back(node->getReverseComplement());
}
QString pathName = nodeName;
if (g_settings->multyGraphMode)
pathName = QString::number(getGraphId()) + "_" + nodeName;
// Match using unique prefix of path name. This allows us to load segmented SPAdes
// scaffold paths (e.g. NODE_1_foo_1) and assign CSV data to all of them
for (auto range = m_deBruijnGraphPaths.equal_prefix_range(pathName.toStdString());
range.first != range.second; ++range.first) {
for (auto *node : (*range.first)->nodes()) {
nodes.emplace_back(node);
if (!g_settings->doubleMode)
nodes.emplace_back(node->getReverseComplement());
}
}

Expand Down
4 changes: 2 additions & 2 deletions graph/assemblygraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ class AssemblyGraph : public QObject

tsl::htrie_map<char, Path*> m_deBruijnGraphPaths;

int m_kmer;
int m_nodeCount;
int m_edgeCount;
int m_pathCount;
unsigned pathCount() const { return m_deBruijnGraphPaths.size(); }

long long m_totalLength;
long long m_shortestContig;
long long m_longestContig;
Expand Down
4 changes: 4 additions & 0 deletions graph/assemblygraphbuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ namespace io {

static bool isStandardTag(const char name[2]) {
switch (makeTag(name)) {
case makeTag("dp"):
case makeTag("DP"):
case makeTag("LN"):
case makeTag("KC"):
Expand Down Expand Up @@ -331,6 +332,9 @@ namespace io {
if (auto dpTag = gfa::getTag<float>("DP", record.tags)) {
graph.m_depthTag = "DP";
nodeDepth = *dpTag;
} else if (auto dpTag = gfa::getTag<float>("dp", record.tags)) {
graph.m_depthTag = "DP";
nodeDepth = *dpTag;
} else if (auto kcTag = gfa::getTag<int64_t>("KC", record.tags)) {
graph.m_depthTag = "KC";
nodeDepth = double(*kcTag) / double(length);
Expand Down
12 changes: 3 additions & 9 deletions graph/assemblygraphlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@ class AssemblyGraphList
}
return m_graphMap[1];
}
//AssemblyGraph* last() { return m_graphList.last(); }
bool isEmpty() { return m_graphMap.isEmpty(); }
//auto begin() { return m_graphList.begin(); }
//auto begin() const { return m_graphList.begin(); }
//auto end() { return m_graphList.end(); }
//auto end() const { return m_graphList.end(); }
//void append(AssemblyGraph* graph) { m_graphList.append(graph); }
void clear() { m_graphMap.clear(); }
size_t size() const { return m_graphMap.size(); }

Expand All @@ -44,10 +38,10 @@ class AssemblyGraphList
}
return count;
}
int getPathCount() {
int count = 0;
unsigned getPathCount() {
unsigned count = 0;
for (auto graph : m_graphMap.values()) {
count += graph->m_pathCount;
count += graph->pathCount();
}
return count;
}
Expand Down
2 changes: 1 addition & 1 deletion graph/gfawriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace gfa {
//We use the depthTag to guide how we save the node depth.
//If it is empty, that implies that the loaded graph did not have depth
//information and so we don't save depth.
if (depthTag == "DP")
if (depthTag == "DP" || depthTag == "dp")
gfaSegmentLine += "\tDP:f:" + QString::number(node->getDepth()).toLatin1();
else if (depthTag == "KC" || depthTag == "RC" || depthTag == "FC")
gfaSegmentLine += "\t" + depthTag.toLatin1() + ":i:" +
Expand Down
11 changes: 0 additions & 11 deletions graph/graphicsitemedgecommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,17 +128,6 @@ void GraphicsItemEdge::paint(QPainter * painter, const QStyleOptionGraphicsItem
}

void GraphicsItemEdge::remakePath() {
QPointF startLocation, beforeStartLocation, endLocation, afterEndLocation;
getControlPointLocations(m_deBruijnEdge,
startLocation, beforeStartLocation,
endLocation, afterEndLocation);
GraphicsItemEdgeCommon::calculateAndSetPath(
m_deBruijnEdge->getStartingNode(),
m_deBruijnEdge->getEndingNode(),
startLocation,
beforeStartLocation,
endLocation,
afterEndLocation);
}

void GraphicsItemEdge::getControlPointLocations(const DeBruijnEdge *edge,
Expand Down
65 changes: 65 additions & 0 deletions graph/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <QFile>
#include <QTextStream>
#include <stdexcept>

namespace io {
bool loadGFAPaths(AssemblyGraph &graph,
Expand Down Expand Up @@ -178,4 +179,68 @@ namespace io {
return true;
}

bool loadSPAdesPaths(AssemblyGraph &graph,
QString fileName) {
enum class State {
PathName, // e.g. NODE_1_length_348461_cov_16.477994, we verify NODE_ prefix
Segment, // Ends with ";" or newline
};

QFile inputFile(fileName);
if (!inputFile.open(QIODevice::ReadOnly))
return false;

QTextStream in(&inputFile);
State state = State::PathName;

std::string pathName;
unsigned pathIdx;
while (!in.atEnd()) {
QByteArray line = in.readLine().toLatin1();
if (line.length() == 0)
continue;

switch (state) {
case State::PathName: {
if (!line.startsWith("NODE_"))
throw std::logic_error("invalid path name: does not start with NODE");
pathName = line.toStdString(); pathIdx = 1;
state = State::Segment;
break;
}
case State::Segment: {
if (line.endsWith(";")) {
state = State::Segment;
line = line.chopped(1);
} else
state = State::PathName;

auto addPath = [&](const std::string &name,
const QString &path) {
std::vector<DeBruijnNode *> pathNodes;
for (const auto &nodeName: path.split(","))
pathNodes.push_back(graph.m_deBruijnGraphNodes.at(nodeName.toStdString()));

auto *p = new Path(Path::makeFromOrderedNodes(pathNodes, false));
graph.m_deBruijnGraphPaths[name] = p;
};

// Parse but do not add reverse-complementary paths
if (pathName.at(pathName.size() - 1) == '\'')
continue;

if (pathIdx == 1 && state == State::PathName)
// Keep the name as-is in case of single-segment path
addPath(pathName, line);
else
addPath(pathName + "_" + std::to_string(pathIdx++), line);

break;
}
}
}

return true;
}

}
3 changes: 2 additions & 1 deletion graph/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ namespace io {
bool loadGFAPaths(AssemblyGraph &graph, QString fileName);
bool loadGAFPaths(AssemblyGraph &graph, QString fileName);
bool loadSPAlignerPaths(AssemblyGraph &graph, QString fileName);
}
bool loadSPAdesPaths(AssemblyGraph &graph, QString fileName);
}
1 change: 1 addition & 0 deletions graphsearch/graphsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class GraphSearch : public QObject {
size_t getQueryCountWithAtLeastOnePath() const { return m_queries.getQueryCountWithAtLeastOnePath(); }
size_t getQueryPathCount() const { return m_queries.getQueryPathCount(); }
size_t getQueryCount(QuerySequenceType sequenceType) const { return m_queries.getQueryCount(sequenceType); }
size_t getNumHits() const { return m_queries.numHits(); }

static bool findProgram(const QString& programName, QString * command);

Expand Down
Loading

0 comments on commit 93bba6a

Please sign in to comment.