Skip to content

Commit

Permalink
Sparse matrix fix (#42)
Browse files Browse the repository at this point in the history
* Fix for sparse matrix clustering. Had to remove the sim = true flag, and other values were not being set properly

* Fix for column reader reading the last line twice.
  • Loading branch information
GregJohnsonJr authored Nov 7, 2024
1 parent 862ed7c commit f52e9b6
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/Adapters/DistanceFileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class DistanceFileReader {
// We need to deduce type, the easy way to do that is to see if there is a number for the first item read.
// Phylip files have a number of sequences located at the top. We can use that to our advantage.
virtual bool Read(const std::string& filePath) {return false;}
DistanceFileReader(SparseDistanceMatrix*, ListVector*);
DistanceFileReader(SparseDistanceMatrix*, ListVector*, bool);
DistanceFileReader() = default;
// Phylip files do not need a count table
SparseDistanceMatrix* GetSparseMatrix() const {return new SparseDistanceMatrix(*sparseMatrix);}
Expand Down
19 changes: 4 additions & 15 deletions src/ColumnDistanceMatrixReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,15 @@ bool ColumnDistanceMatrixReader::Read(const std::string& filePath) {
fileHandle >> firstName;
fileHandle >> secondName;
fileHandle >> dist;

if(nameToIndexMap.find(firstName) != nameToIndexMap.end() ||
nameToIndexMap.find(secondName) != nameToIndexMap.end()) {
fileHandle.clear();
fileHandle.seekg(0, std::ifstream::beg);
}
while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...

fileHandle >> firstName;
fileHandle >> secondName;
fileHandle >> distance; // get the row and column names and distance
while(fileHandle >> firstName >> secondName >> distance && lt == 1){ //let's assume it's a triangular matrix...
int itA;
int itB;

try {
itA = nameToIndexMap.at(firstName);
itB = nameToIndexMap.at(secondName);
Expand All @@ -68,7 +64,6 @@ bool ColumnDistanceMatrixReader::Read(const std::string& filePath) {
}
util.CheckForDistanceFileError(container);
}
// std::map<std::string,int>::iterator itB = nameMap->find(secondName);

if (util.isEqual(distance, -1)) { distance = 1000000; }
else if (sim) { distance = 1 - distance; } //user has entered a sim matrix that we need to convert.
Expand Down Expand Up @@ -108,19 +103,14 @@ bool ColumnDistanceMatrixReader::Read(const std::string& filePath) {
}
}

if(lt == 0){ // oops, it was square
if(lt == 0) { // oops, it was square
fileHandle.close(); //let's start over
sparseMatrix->clear(); //let's start over
fileHandle.open(filePath); //let's start over

while(fileHandle){
fileHandle >> firstName;
fileHandle >> secondName;
fileHandle >> distance; // get the row and column names and distance

while(fileHandle >> firstName >> secondName >> distance){
int itA;
int itB;

try {
itA = nameToIndexMap.at(firstName);
itB = nameToIndexMap.at(secondName);
Expand All @@ -145,7 +135,6 @@ bool ColumnDistanceMatrixReader::Read(const std::string& filePath) {
}
}
}

fileHandle.close();
list->setLabel("0");
return true;
Expand Down
2 changes: 1 addition & 1 deletion src/DistanceFileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


DistanceFileReader::DistanceFileReader(SparseDistanceMatrix *sparseDistanceMatrix,
ListVector *listVector):sparseMatrix(sparseDistanceMatrix), list(listVector) {}
ListVector *listVector, const bool isSim):sparseMatrix(sparseDistanceMatrix), list(listVector), sim(isSim){}

Rcpp::DataFrame DistanceFileReader::SparseMatrixToDataFrame() const {
const size_t size = sparseMatrix->seqVec.size();
Expand Down
19 changes: 10 additions & 9 deletions src/MatrixAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,18 @@ SparseDistanceMatrix MatrixAdapter::CreateSparseMatrix() {
const Utils util;
util.CheckForDistanceFileError(unknownNames);
}

matrixNames = std::vector<std::string>(nameSize);
for (int i = 0; i < nameSize; i++) {
positionsToNames[xPosition[i]] = countTable.GetNameByIndex(i); // Not going to work, I need a way to link my names to the sparse matix indices
positionsToNames[i] = countTable.GetNameByIndex(i);// Not going to work, I need a way to link my names to the sparse matix indices
matrixNames[i] = positionsToNames[i];
}


matrixNames = std::vector<std::string>(nameSize);
for (int i = 0; i < nameSize; i++) {
positionsOfIndexs[xPosition[i]] = i;
matrixNames[i] = positionsToNames[xPosition[i]];
}

// for (int i = 0; i < nameSize; i++) {
// positionsOfIndexs[xPosition[i]] = i; // Position of indexes is incorrectly made
// matrixNames[i] = positionsToNames[i];
// }


for (int i = 0; i < nSeqs; i++) {
Expand All @@ -103,8 +104,8 @@ SparseDistanceMatrix MatrixAdapter::CreateSparseMatrix() {
if(currentDist < 0) {
currentDist = 0;
}
const int xIndex = positionsOfIndexs[xPosition[i]]; // Coming from r -> c++, indeces start at 1 in r
const int yIndex = positionsOfIndexs[yPosition[i]];
const int xIndex = xPosition[i]; // Coming from r -> c++, indeces start at 1 in r
const int yIndex = yPosition[i];

// const double currentValueX = dataList[yIndex].rowValues[xIndex];
// if(currentValueX != 0){ // We already set the value and this is a sparse matrix.
Expand Down
4 changes: 2 additions & 2 deletions src/SparseMatrixTestFixture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ bool SparseMatrixTestFixture::TestRemoveCell(const unsigned long row, const unsi
const bool expectedResult) {
Setup();
unsigned long currentSize = 0;
if(row < sparseDistanceMatrix->seqVec.max_size())
if(row < sparseDistanceMatrix->seqVec.size())
currentSize = sparseDistanceMatrix->seqVec[row].size();
sparseDistanceMatrix->rmCell(row, col);
const bool result = sparseDistanceMatrix->seqVec[row].size() < currentSize;
const bool result = sparseDistanceMatrix->seqVec[row].size() == currentSize - 1;
TearDown();
return result == expectedResult;
}
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ SEXP ProcessSparseMatrix(const std::vector<int> &xPosition,
countTableAdapter.CreateDataFrameMap(countTable);
MatrixAdapter adapter(xPosition, yPosition, data, cutoff, isSim, countTableAdapter);
auto* read = new DistanceFileReader(new SparseDistanceMatrix(adapter.CreateSparseMatrix()),
new ListVector(adapter.CreateListVector()));
new ListVector(adapter.CreateListVector()), isSim);
read->CreateCountTableAdapter(countTable);
return Rcpp::XPtr<DistanceFileReader>(read);
}
Expand Down
2 changes: 1 addition & 1 deletion src/test-sparse_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ context("Spase Distance Matrix") {
}
test_that("RmCells remove cells correctly") {
SparseMatrixTestFixture fixture;
bool result = fixture.TestRemoveCell(1,1,true);
bool result = fixture.TestRemoveCell(1,0,true);
expect_true(result);
}
test_that("Test Adding Cells sorted works") {
Expand Down

0 comments on commit f52e9b6

Please sign in to comment.