From 737332d21d086f2a230f98380aa184cb96f17f6e Mon Sep 17 00:00:00 2001
From: Adhhitha Dias <kadhitha@purdue.edu>
Date: Tue, 7 Mar 2023 17:07:29 -0500
Subject: [PATCH] fix producer consumer internchange

when the producer is at the end of the assignment, the argument packing needs to be changed according to the changed index statement
---
 include/taco/tensor.h     |  1 +
 src/tensor.cpp            | 87 +++++++++++++++++++++++++++++++++++++++
 test/tests-workspaces.cpp | 58 +++++++++++++++++++++++++-
 3 files changed, 145 insertions(+), 1 deletion(-)
diff --git a/include/taco/tensor.h b/include/taco/tensor.h
index c462cbd32..75af68ba2 100644
--- a/include/taco/tensor.h
+++ b/include/taco/tensor.h
@@ -429,6 +429,7 @@ class TensorBase {
 
   /// Compute the given expression and put the values in the tensor storage.
   void compute();
+  void compute(IndexStmt stmt);
 
   /// Compile, assemble and compute as needed.
   void evaluate();
diff --git a/src/tensor.cpp b/src/tensor.cpp
index 257c396c3..eb4b4595a 100644
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@@ -775,6 +775,41 @@ static inline map<TensorVar, TensorBase> getTensors(const IndexExpr& expr) {
   return getOperands.arguments;
 }
 
+static inline map<TensorVar, TensorBase> getTensors(const IndexStmt& stmt, vector<TensorVar>& operands) {
+  struct GetOperands : public IndexNotationVisitor {
+    using IndexNotationVisitor::visit;
+    vector<TensorVar>& operands;
+    map<TensorVar, TensorBase> arguments;
+
+    GetOperands(vector<TensorVar>& operands) : operands(operands) {}
+
+    void visit(const AccessNode* node) {
+      if (!isa<AccessTensorNode>(node)) {
+        return; // temporary ignore
+      }
+      Access ac = Access(node);
+      taco_iassert(isa<AccessTensorNode>(node)) << "Unknown subexpression";
+
+      if (!util::contains(arguments, node->tensorVar)) {
+        arguments.insert({node->tensorVar, to<AccessTensorNode>(node)->tensor});
+        operands.push_back(node->tensorVar);
+      }
+
+      // Also add any tensors backing index sets of tensor accesses.
+      for (auto& p : node->indexSetModes) {
+        auto tv = p.second.tensor.getTensorVar();
+        if (!util::contains(arguments, tv)) {
+          arguments.insert({tv, p.second.tensor});
+          operands.push_back(tv);
+        }
+      }
+    }
+  };
+  GetOperands getOperands(operands);
+  stmt.accept(&getOperands);
+  return getOperands.arguments;
+}
+
 static inline
 vector<void*> packArguments(const TensorBase& tensor) {
   vector<void*> arguments;
@@ -805,6 +840,35 @@ vector<void*> packArguments(const TensorBase& tensor) {
   return arguments;
 }
 
+static inline
+vector<void*> packArguments(const TensorBase& tensor, const IndexStmt stmt) {
+  vector<void*> arguments;
+
+  // Pack the result tensor
+  arguments.push_back(tensor.getStorage());
+
+  // Pack any index sets on the result tensor at the front of the arguments list.
+  auto lhs = getNode(tensor.getAssignment().getLhs());
+  // We check isa<AccessNode> rather than isa<AccessTensorNode> to catch cases
+  // where the underlying access is represented with the base AccessNode class.
+  if (isa<AccessNode>(lhs)) {
+    auto indexSetModes = to<AccessNode>(lhs)->indexSetModes;
+    for (auto& it : indexSetModes) {
+      arguments.push_back(it.second.tensor.getStorage());
+    }
+  }
+
+  // Pack operand tensors
+  std::vector<TensorVar> operands;
+  auto tensors = getTensors(stmt, operands);
+  for (auto& operand : operands) {
+    taco_iassert(util::contains(tensors, operand));
+    arguments.push_back(tensors.at(operand).getStorage());
+  }
+
+  return arguments;
+}
+
 void TensorBase::assemble() {
   taco_uassert(!needsCompile()) << error::assemble_without_compile;
   if (!needsAssemble()) {
@@ -849,6 +913,29 @@ void TensorBase::compute() {
   }
 }
 
+void TensorBase::compute(IndexStmt stmt) {
+    taco_uassert(!needsCompile()) << error::compute_without_compile;
+  if (!needsCompute()) {
+    return;
+  }
+  setNeedsCompute(false);
+  // Sync operand tensors if needed.
+  auto operands = getTensors(getAssignment().getRhs());
+  for (auto& operand : operands) {
+    operand.second.syncValues();
+    operand.second.removeDependentTensor(*this);
+  }
+
+  auto arguments = packArguments(*this, stmt);
+  this->content->module->callFuncPacked("compute", arguments.data());
+
+  if (content->assembleWhileCompute) {
+    setNeedsAssemble(false);
+    taco_tensor_t* tensorData = ((taco_tensor_t*)arguments[0]);
+    content->valuesSize = unpackTensorData(*tensorData, *this);
+  }
+}
+
 void TensorBase::evaluate() {
   this->compile();
   if (!getAssignment().getOperator().defined()) {
diff --git a/test/tests-workspaces.cpp b/test/tests-workspaces.cpp
index ec084456e..2de6d2b40 100644
--- a/test/tests-workspaces.cpp
+++ b/test/tests-workspaces.cpp
@@ -652,6 +652,7 @@ TEST(workspaces, tile_dotProduct_3) {
 
 TEST(workspaces, loopfuse) {
   int N = 16;
+  float SPARSITY = 0.3;
   Tensor<double> A("A", {N, N}, Format{Dense, Dense});
   Tensor<double> B("B", {N, N}, Format{Dense, Sparse});
   Tensor<double> C("C", {N, N}, Format{Dense, Dense});
@@ -660,12 +661,16 @@ TEST(workspaces, loopfuse) {
 
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < N; j++) {
-      B.insert({i, j}, (double) i);
+      float rand_float = (float) rand() / (float) RAND_MAX;
+      if (rand_float < SPARSITY)
+        B.insert({i, j}, (double) i);
       C.insert({i, j}, (double) j);
       E.insert({i, j}, (double) i*j);
       D.insert({i, j}, (double) i*j);
     }
   }
+  B.pack();
+  write("/home/min/a/kadhitha/workspace/my_taco/sparseSched/build/tensors/B.mtx", B);
 
   IndexVar i("i"), j("j"), k("k"), l("l"), m("m");
   A(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);
@@ -703,6 +708,57 @@ TEST(workspaces, loopfuse) {
 }
 
 
+TEST(workspaces, loopreversefuse) {
+  int N = 16;
+  float SPARSITY = 0.3;
+  Tensor<double> A("A", {N, N}, Format{Dense, Dense});
+  Tensor<double> B("B", {N, N}, Format{Dense, Sparse});
+  Tensor<double> C("C", {N, N}, Format{Dense, Dense});
+  Tensor<double> D("D", {N, N}, Format{Dense, Dense});
+  Tensor<double> E("E", {N, N}, Format{Dense, Dense});
+
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      float rand_float = (float) rand() / (float) RAND_MAX;
+      if (rand_float < SPARSITY) 
+        B.insert({i, j}, (double) rand_float);
+      C.insert({i, j}, (double) j);
+      E.insert({i, j}, (double) i*j);
+      D.insert({i, j}, (double) i*j);
+    }
+  }
+
+  IndexVar i("i"), j("j"), k("k"), l("l"), m("m");
+  A(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);
+
+  IndexStmt stmt = A.getAssignment().concretize();
+
+  std::cout << stmt << endl;
+  vector<int> path1;
+  stmt = stmt
+    .reorder({m,k,l,i,j})
+    .loopfuse(2, false, path1)
+    ;
+  stmt = stmt
+    .parallelize(m, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces)
+    ;
+
+  stmt = stmt.concretize();
+  cout << "final stmt: " << stmt << endl;
+  printCodeToFile("loopreversefuse", stmt);
+
+  A.compile(stmt);
+  B.pack();
+  A.assemble();
+  A.compute(stmt);
+
+  Tensor<double> expected("expected", {N, N}, Format{Dense, Dense});
+  expected(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);
+  expected.compile();
+  expected.assemble();
+  expected.compute();
+  ASSERT_TENSOR_EQ(expected, A);
+}
 
 TEST(workspaces, loopcontractfuse) {
   int N = 16;