Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes: allocate additional temporaries #11

Open
wants to merge 10 commits into
base: sparsesched
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ project(taco
)
option(CUDA "Build for NVIDIA GPU (CUDA must be preinstalled)" OFF)
option(PYTHON "Build TACO for python environment" OFF)
option(OPENMP "Build with OpenMP execution support" OFF)
option(OPENMP "Build with OpenMP execution support" ON)
option(COVERAGE "Build with code coverage analysis" OFF)
set(TACO_FEATURE_CUDA 0)
set(TACO_FEATURE_OPENMP 0)
set(TACO_FEATURE_OPENMP 1)
set(TACO_FEATURE_PYTHON 0)
if(CUDA)
message("-- Searching for CUDA Installation")
Expand Down
2 changes: 2 additions & 0 deletions include/taco/index_notation/index_notation.h
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,8 @@ std::vector<TensorVar> getAttrQueryResults(IndexStmt stmt);
/// Returns the temporaries in the index statement, in the order they appear.
std::map<Forall, std::vector<Where> > getTemporaryLocations(IndexStmt stmt);

void getWhereTempsToResult(IndexStmt stmt, std::map<TensorVar, const AccessNode *>& _whereTempsToResult);

/// Returns the results in the index statement that should be assembled by
/// ungrouped insertion.
std::vector<TensorVar> getAssembledByUngroupedInsertion(IndexStmt stmt);
Expand Down
16 changes: 14 additions & 2 deletions src/codegen/codegen_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ const string cHeaders =
"#include <math.h>\n"
"#include <complex.h>\n"
"#include <string.h>\n"
"#if _OPENMP\n"
// "#if _OPENMP\n"
"#include <omp.h>\n"
"#endif\n"
// "#endif\n"
"#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))\n"
"#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))\n"
"#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)\n"
Expand Down Expand Up @@ -277,6 +277,8 @@ void CodeGen_C::compile(Stmt stmt, bool isFirst) {
}
out << endl;
// generate code for the Stmt
// std::cout << "generating code for statement" << std::endl;
// std::cout << stmt << std::endl;
stmt.accept(this);
}

Expand Down Expand Up @@ -328,6 +330,16 @@ void CodeGen_C::visit(const Function* func) {
<< endl;
}

// out << "\tchar * val;" << endl;
// out << "\tval = getenv( \"OMP_SCHEDULE\" );" << endl;
// out << "\tprintf(\"OMP_SCHEDULE: %s\\n\", val);" << endl;
// out << "\tomp_sched_t existingSched;\n";
// out << "\tint existingChunkSize;\n";
// out << "\tomp_get_schedule(&existingSched, &existingChunkSize);\n";
// out << "\tprintf(\"existingSched: %d\\n\", existingSched);\n";
// out << "\tprintf(\"existingChunkSize: %d\\n\", existingChunkSize);\n";
// out << "\tprintf(\"num_threads: %d\\n\", omp_get_max_threads());\n";

// output body
print(func->body);

Expand Down
10 changes: 7 additions & 3 deletions src/codegen/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

using namespace std;

// #define USE_OPENMP
// #undef TACO_DEBUG

namespace taco {
namespace ir {

Expand Down Expand Up @@ -134,9 +137,10 @@ string Module::compile() {
string defaultFlags = "-O3 -ffast-math -std=c99";
#endif
cflags = util::getFromEnv("TACO_CFLAGS", defaultFlags) + " -shared -fPIC";
#if USE_OPENMP
// #if USE_OPENMP
// cout << "Using OpenMP $$" << endl;
cflags += " -fopenmp";
#endif
// #endif
file_ending = ".c";
shims_file = "";
}
Expand All @@ -145,7 +149,7 @@ string Module::compile() {
prefix + file_ending + " " + shims_file + " " +
"-o " + fullpath + " -lm";

// std::cout << "Compiling generated code with command:\n" << cmd << "\n";
// std::cout << "Compiling generated code with command: " << cmd << "\n";

// open the output file & write out the source
compileToSource(tmpdir, libname);
Expand Down
29 changes: 29 additions & 0 deletions src/index_notation/index_notation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3475,6 +3475,32 @@ bool allForFreeLoopsBeforeAllReductionLoops(IndexStmt stmt) {
return true;
}

void getWhereTempsToResult(IndexStmt stmt, std::map<TensorVar, const AccessNode *>& _whereTempsToResult) {
struct TemporaryLocsGetter : public IndexNotationVisitor {
std::map<TensorVar, const AccessNode *>& whereTempsToResult;

TemporaryLocsGetter(std::map<TensorVar, const AccessNode *>& _whereTempsToResult) : whereTempsToResult(_whereTempsToResult) {}

using IndexNotationVisitor::visit;

void visit(const WhereNode *op) {
Where where = Where(op);
TensorVar temporary = where.getTemporary();

match(where.getConsumer(),
std::function<void(const AssignmentNode*)>([&](const AssignmentNode* op) {
if (op->lhs.getTensorVar().getOrder() > 0 && whereTempsToResult[temporary] == NULL) {
whereTempsToResult[temporary] = (const AccessNode *) op->lhs.ptr;
}
})
);
IndexNotationVisitor::visit(op);
}
};
TemporaryLocsGetter getter(_whereTempsToResult);
getter.visit(stmt);
}

std::map<Forall, vector<Where> > getTemporaryLocations(IndexStmt stmt) {
struct TemporaryLocsGetter : public IndexNotationVisitor {
map<Forall, vector<Where> > temporaryLocs;
Expand Down Expand Up @@ -3512,6 +3538,9 @@ std::map<Forall, vector<Where> > getTemporaryLocations(IndexStmt stmt) {


std::vector<TensorVar> getTemporaries(IndexStmt stmt) {
// std::cout << "getTemporaries" << std::endl;
// std::cout << "stmt: " << stmt << std::endl;

vector<TensorVar> temporaries;
bool firstAssignment = true;
match(stmt,
Expand Down
Loading