diff --git a/include/taco/tensor.h b/include/taco/tensor.h index c462cbd32..75af68ba2 100644 --- a/include/taco/tensor.h +++ b/include/taco/tensor.h @@ -429,6 +429,7 @@ class TensorBase { /// Compute the given expression and put the values in the tensor storage. void compute(); + void compute(IndexStmt stmt); /// Compile, assemble and compute as needed. void evaluate(); diff --git a/src/tensor.cpp b/src/tensor.cpp index 257c396c3..eb4b4595a 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -775,6 +775,41 @@ static inline map getTensors(const IndexExpr& expr) { return getOperands.arguments; } +static inline map getTensors(const IndexStmt& stmt, vector& operands) { + struct GetOperands : public IndexNotationVisitor { + using IndexNotationVisitor::visit; + vector& operands; + map arguments; + + GetOperands(vector& operands) : operands(operands) {} + + void visit(const AccessNode* node) { + if (!isa(node)) { + return; // temporary ignore + } + Access ac = Access(node); + taco_iassert(isa(node)) << "Unknown subexpression"; + + if (!util::contains(arguments, node->tensorVar)) { + arguments.insert({node->tensorVar, to(node)->tensor}); + operands.push_back(node->tensorVar); + } + + // Also add any tensors backing index sets of tensor accesses. + for (auto& p : node->indexSetModes) { + auto tv = p.second.tensor.getTensorVar(); + if (!util::contains(arguments, tv)) { + arguments.insert({tv, p.second.tensor}); + operands.push_back(tv); + } + } + } + }; + GetOperands getOperands(operands); + stmt.accept(&getOperands); + return getOperands.arguments; +} + static inline vector packArguments(const TensorBase& tensor) { vector arguments; @@ -805,6 +840,35 @@ vector packArguments(const TensorBase& tensor) { return arguments; } +static inline +vector packArguments(const TensorBase& tensor, const IndexStmt stmt) { + vector arguments; + + // Pack the result tensor + arguments.push_back(tensor.getStorage()); + + // Pack any index sets on the result tensor at the front of the arguments list. + auto lhs = getNode(tensor.getAssignment().getLhs()); + // We check isa rather than isa to catch cases + // where the underlying access is represented with the base AccessNode class. + if (isa(lhs)) { + auto indexSetModes = to(lhs)->indexSetModes; + for (auto& it : indexSetModes) { + arguments.push_back(it.second.tensor.getStorage()); + } + } + + // Pack operand tensors + std::vector operands; + auto tensors = getTensors(stmt, operands); + for (auto& operand : operands) { + taco_iassert(util::contains(tensors, operand)); + arguments.push_back(tensors.at(operand).getStorage()); + } + + return arguments; +} + void TensorBase::assemble() { taco_uassert(!needsCompile()) << error::assemble_without_compile; if (!needsAssemble()) { @@ -849,6 +913,29 @@ void TensorBase::compute() { } } +void TensorBase::compute(IndexStmt stmt) { + taco_uassert(!needsCompile()) << error::compute_without_compile; + if (!needsCompute()) { + return; + } + setNeedsCompute(false); + // Sync operand tensors if needed. + auto operands = getTensors(getAssignment().getRhs()); + for (auto& operand : operands) { + operand.second.syncValues(); + operand.second.removeDependentTensor(*this); + } + + auto arguments = packArguments(*this, stmt); + this->content->module->callFuncPacked("compute", arguments.data()); + + if (content->assembleWhileCompute) { + setNeedsAssemble(false); + taco_tensor_t* tensorData = ((taco_tensor_t*)arguments[0]); + content->valuesSize = unpackTensorData(*tensorData, *this); + } +} + void TensorBase::evaluate() { this->compile(); if (!getAssignment().getOperator().defined()) { diff --git a/test/tests-workspaces.cpp b/test/tests-workspaces.cpp index ec084456e..e34935305 100644 --- a/test/tests-workspaces.cpp +++ b/test/tests-workspaces.cpp @@ -652,6 +652,7 @@ TEST(workspaces, tile_dotProduct_3) { TEST(workspaces, loopfuse) { int N = 16; + float SPARSITY = 0.3; Tensor A("A", {N, N}, Format{Dense, Dense}); Tensor B("B", {N, N}, Format{Dense, Sparse}); Tensor C("C", {N, N}, Format{Dense, Dense}); @@ -660,7 +661,9 @@ TEST(workspaces, loopfuse) { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { - B.insert({i, j}, (double) i); + float rand_float = (float) rand() / (float) RAND_MAX; + if (rand_float < SPARSITY) + B.insert({i, j}, (double) i); C.insert({i, j}, (double) j); E.insert({i, j}, (double) i*j); D.insert({i, j}, (double) i*j); @@ -703,6 +706,57 @@ TEST(workspaces, loopfuse) { } +TEST(workspaces, loopreversefuse) { + int N = 16; + float SPARSITY = 0.3; + Tensor A("A", {N, N}, Format{Dense, Dense}); + Tensor B("B", {N, N}, Format{Dense, Sparse}); + Tensor C("C", {N, N}, Format{Dense, Dense}); + Tensor D("D", {N, N}, Format{Dense, Dense}); + Tensor E("E", {N, N}, Format{Dense, Dense}); + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + float rand_float = (float) rand() / (float) RAND_MAX; + if (rand_float < SPARSITY) + B.insert({i, j}, (double) rand_float); + C.insert({i, j}, (double) j); + E.insert({i, j}, (double) i*j); + D.insert({i, j}, (double) i*j); + } + } + + IndexVar i("i"), j("j"), k("k"), l("l"), m("m"); + A(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m); + + IndexStmt stmt = A.getAssignment().concretize(); + + std::cout << stmt << endl; + vector path1; + stmt = stmt + .reorder({m,k,l,i,j}) + .loopfuse(2, false, path1) + ; + stmt = stmt + .parallelize(m, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces) + ; + + stmt = stmt.concretize(); + cout << "final stmt: " << stmt << endl; + printCodeToFile("loopreversefuse", stmt); + + A.compile(stmt); + B.pack(); + A.assemble(); + A.compute(stmt); + + Tensor expected("expected", {N, N}, Format{Dense, Dense}); + expected(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m); + expected.compile(); + expected.assemble(); + expected.compute(); + ASSERT_TENSOR_EQ(expected, A); +} TEST(workspaces, loopcontractfuse) { int N = 16;