Skip to content

Commit

Permalink
fix producer consumer internchange
Browse files Browse the repository at this point in the history
when the producer is at the end of the assignment, the argument packing needs to be changed according to the changed index statement
  • Loading branch information
adhithadias committed Mar 7, 2023
1 parent 270daf8 commit 737332d
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/taco/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ class TensorBase {

/// Compute the given expression and put the values in the tensor storage.
void compute();
void compute(IndexStmt stmt);

/// Compile, assemble and compute as needed.
void evaluate();
Expand Down
87 changes: 87 additions & 0 deletions src/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,41 @@ static inline map<TensorVar, TensorBase> getTensors(const IndexExpr& expr) {
return getOperands.arguments;
}

static inline map<TensorVar, TensorBase> getTensors(const IndexStmt& stmt, vector<TensorVar>& operands) {
struct GetOperands : public IndexNotationVisitor {
using IndexNotationVisitor::visit;
vector<TensorVar>& operands;
map<TensorVar, TensorBase> arguments;

GetOperands(vector<TensorVar>& operands) : operands(operands) {}

void visit(const AccessNode* node) {
if (!isa<AccessTensorNode>(node)) {
return; // temporary ignore
}
Access ac = Access(node);
taco_iassert(isa<AccessTensorNode>(node)) << "Unknown subexpression";

if (!util::contains(arguments, node->tensorVar)) {
arguments.insert({node->tensorVar, to<AccessTensorNode>(node)->tensor});
operands.push_back(node->tensorVar);
}

// Also add any tensors backing index sets of tensor accesses.
for (auto& p : node->indexSetModes) {
auto tv = p.second.tensor.getTensorVar();
if (!util::contains(arguments, tv)) {
arguments.insert({tv, p.second.tensor});
operands.push_back(tv);
}
}
}
};
GetOperands getOperands(operands);
stmt.accept(&getOperands);
return getOperands.arguments;
}

static inline
vector<void*> packArguments(const TensorBase& tensor) {
vector<void*> arguments;
Expand Down Expand Up @@ -805,6 +840,35 @@ vector<void*> packArguments(const TensorBase& tensor) {
return arguments;
}

static inline
vector<void*> packArguments(const TensorBase& tensor, const IndexStmt stmt) {
vector<void*> arguments;

// Pack the result tensor
arguments.push_back(tensor.getStorage());

// Pack any index sets on the result tensor at the front of the arguments list.
auto lhs = getNode(tensor.getAssignment().getLhs());
// We check isa<AccessNode> rather than isa<AccessTensorNode> to catch cases
// where the underlying access is represented with the base AccessNode class.
if (isa<AccessNode>(lhs)) {
auto indexSetModes = to<AccessNode>(lhs)->indexSetModes;
for (auto& it : indexSetModes) {
arguments.push_back(it.second.tensor.getStorage());
}
}

// Pack operand tensors
std::vector<TensorVar> operands;
auto tensors = getTensors(stmt, operands);
for (auto& operand : operands) {
taco_iassert(util::contains(tensors, operand));
arguments.push_back(tensors.at(operand).getStorage());
}

return arguments;
}

void TensorBase::assemble() {
taco_uassert(!needsCompile()) << error::assemble_without_compile;
if (!needsAssemble()) {
Expand Down Expand Up @@ -849,6 +913,29 @@ void TensorBase::compute() {
}
}

void TensorBase::compute(IndexStmt stmt) {
taco_uassert(!needsCompile()) << error::compute_without_compile;
if (!needsCompute()) {
return;
}
setNeedsCompute(false);
// Sync operand tensors if needed.
auto operands = getTensors(getAssignment().getRhs());
for (auto& operand : operands) {
operand.second.syncValues();
operand.second.removeDependentTensor(*this);
}

auto arguments = packArguments(*this, stmt);
this->content->module->callFuncPacked("compute", arguments.data());

if (content->assembleWhileCompute) {
setNeedsAssemble(false);
taco_tensor_t* tensorData = ((taco_tensor_t*)arguments[0]);
content->valuesSize = unpackTensorData(*tensorData, *this);
}
}

void TensorBase::evaluate() {
this->compile();
if (!getAssignment().getOperator().defined()) {
Expand Down
58 changes: 57 additions & 1 deletion test/tests-workspaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ TEST(workspaces, tile_dotProduct_3) {

TEST(workspaces, loopfuse) {
int N = 16;
float SPARSITY = 0.3;
Tensor<double> A("A", {N, N}, Format{Dense, Dense});
Tensor<double> B("B", {N, N}, Format{Dense, Sparse});
Tensor<double> C("C", {N, N}, Format{Dense, Dense});
Expand All @@ -660,12 +661,16 @@ TEST(workspaces, loopfuse) {

for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
B.insert({i, j}, (double) i);
float rand_float = (float) rand() / (float) RAND_MAX;
if (rand_float < SPARSITY)
B.insert({i, j}, (double) i);
C.insert({i, j}, (double) j);
E.insert({i, j}, (double) i*j);
D.insert({i, j}, (double) i*j);
}
}
B.pack();
write("/home/min/a/kadhitha/workspace/my_taco/sparseSched/build/tensors/B.mtx", B);

IndexVar i("i"), j("j"), k("k"), l("l"), m("m");
A(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);
Expand Down Expand Up @@ -703,6 +708,57 @@ TEST(workspaces, loopfuse) {
}


TEST(workspaces, loopreversefuse) {
int N = 16;
float SPARSITY = 0.3;
Tensor<double> A("A", {N, N}, Format{Dense, Dense});
Tensor<double> B("B", {N, N}, Format{Dense, Sparse});
Tensor<double> C("C", {N, N}, Format{Dense, Dense});
Tensor<double> D("D", {N, N}, Format{Dense, Dense});
Tensor<double> E("E", {N, N}, Format{Dense, Dense});

for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
float rand_float = (float) rand() / (float) RAND_MAX;
if (rand_float < SPARSITY)
B.insert({i, j}, (double) rand_float);
C.insert({i, j}, (double) j);
E.insert({i, j}, (double) i*j);
D.insert({i, j}, (double) i*j);
}
}

IndexVar i("i"), j("j"), k("k"), l("l"), m("m");
A(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);

IndexStmt stmt = A.getAssignment().concretize();

std::cout << stmt << endl;
vector<int> path1;
stmt = stmt
.reorder({m,k,l,i,j})
.loopfuse(2, false, path1)
;
stmt = stmt
.parallelize(m, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces)
;

stmt = stmt.concretize();
cout << "final stmt: " << stmt << endl;
printCodeToFile("loopreversefuse", stmt);

A.compile(stmt);
B.pack();
A.assemble();
A.compute(stmt);

Tensor<double> expected("expected", {N, N}, Format{Dense, Dense});
expected(i,m) = B(i,j) * C(j,k) * D(k,l) * E(l,m);
expected.compile();
expected.assemble();
expected.compute();
ASSERT_TENSOR_EQ(expected, A);
}

TEST(workspaces, loopcontractfuse) {
int N = 16;
Expand Down

0 comments on commit 737332d

Please sign in to comment.