adhithadias · adhithadias · Sep 20, 2023 · Sep 20, 2023 · Nov 10, 2023 · Nov 13, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -11,10 +11,10 @@ project(taco
 )
 option(CUDA "Build for NVIDIA GPU (CUDA must be preinstalled)" OFF)
 option(PYTHON "Build TACO for python environment" OFF)
-option(OPENMP "Build with OpenMP execution support" OFF)
+option(OPENMP "Build with OpenMP execution support" ON)
 option(COVERAGE "Build with code coverage analysis" OFF)
 set(TACO_FEATURE_CUDA 0)
-set(TACO_FEATURE_OPENMP 0)
+set(TACO_FEATURE_OPENMP 1)
 set(TACO_FEATURE_PYTHON 0)
 if(CUDA)
   message("-- Searching for CUDA Installation")

diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h
@@ -1325,6 +1325,8 @@ std::vector<TensorVar> getAttrQueryResults(IndexStmt stmt);
 /// Returns the temporaries in the index statement, in the order they appear.
 std::map<Forall, std::vector<Where> > getTemporaryLocations(IndexStmt stmt);
 
+void getWhereTempsToResult(IndexStmt stmt, std::map<TensorVar, const AccessNode *>& _whereTempsToResult);
+
 /// Returns the results in the index statement that should be assembled by 
 /// ungrouped insertion.
 std::vector<TensorVar> getAssembledByUngroupedInsertion(IndexStmt stmt);

diff --git a/src/codegen/codegen_c.cpp b/src/codegen/codegen_c.cpp
@@ -34,9 +34,9 @@ const string cHeaders =
   "#include <math.h>\n"
   "#include <complex.h>\n"
   "#include <string.h>\n"
-  "#if _OPENMP\n"
+  // "#if _OPENMP\n"
   "#include <omp.h>\n"
-  "#endif\n"
+  // "#endif\n"
   "#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))\n"
   "#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))\n"
   "#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)\n"
@@ -277,6 +277,8 @@ void CodeGen_C::compile(Stmt stmt, bool isFirst) {
   }
   out << endl;
   // generate code for the Stmt
+  // std::cout << "generating code for statement" << std::endl;
+  // std::cout << stmt << std::endl;
   stmt.accept(this);
 }
 
@@ -328,6 +330,16 @@ void CodeGen_C::visit(const Function* func) {
         << endl;
   }
 
+  // out << "\tchar * val;" << endl;
+  // out << "\tval = getenv( \"OMP_SCHEDULE\" );" << endl;
+  // out << "\tprintf(\"OMP_SCHEDULE: %s\\n\", val);" << endl;
+  // out << "\tomp_sched_t existingSched;\n";
+  // out << "\tint existingChunkSize;\n";
+  // out << "\tomp_get_schedule(&existingSched, &existingChunkSize);\n";
+  // out << "\tprintf(\"existingSched: %d\\n\", existingSched);\n";
+  // out << "\tprintf(\"existingChunkSize: %d\\n\", existingChunkSize);\n";
+  // out << "\tprintf(\"num_threads: %d\\n\", omp_get_max_threads());\n";
+
   // output body
   print(func->body);
 

diff --git a/src/codegen/module.cpp b/src/codegen/module.cpp
@@ -18,6 +18,9 @@
 
 using namespace std;
 
+// #define USE_OPENMP
+// #undef TACO_DEBUG 
+
 namespace taco {
 namespace ir {
 
@@ -134,9 +137,10 @@ string Module::compile() {
     string defaultFlags = "-O3 -ffast-math -std=c99";
 #endif
     cflags = util::getFromEnv("TACO_CFLAGS", defaultFlags) + " -shared -fPIC";
-#if USE_OPENMP
+// #if USE_OPENMP
+    // cout << "Using OpenMP $$" << endl;
     cflags += " -fopenmp";
-#endif
+// #endif
     file_ending = ".c";
     shims_file = "";
   }
@@ -145,7 +149,7 @@ string Module::compile() {
     prefix + file_ending + " " + shims_file + " " + 
     "-o " + fullpath + " -lm";
 
-  // std::cout << "Compiling generated code with command:\n" << cmd << "\n";
+  // std::cout << "Compiling generated code with command: " << cmd << "\n";
 
   // open the output file & write out the source
   compileToSource(tmpdir, libname);

diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp
@@ -3475,6 +3475,32 @@ bool allForFreeLoopsBeforeAllReductionLoops(IndexStmt stmt) {
     return true;
   }
 
+void getWhereTempsToResult(IndexStmt stmt, std::map<TensorVar, const AccessNode *>& _whereTempsToResult) {
+  struct TemporaryLocsGetter : public IndexNotationVisitor {
+    std::map<TensorVar, const AccessNode *>& whereTempsToResult;
+
+    TemporaryLocsGetter(std::map<TensorVar, const AccessNode *>& _whereTempsToResult) : whereTempsToResult(_whereTempsToResult) {}
+
+    using IndexNotationVisitor::visit;
+
+    void visit(const WhereNode *op) {
+      Where where = Where(op);
+      TensorVar temporary = where.getTemporary();
+
+      match(where.getConsumer(), 
+      std::function<void(const AssignmentNode*)>([&](const AssignmentNode* op) {
+          if (op->lhs.getTensorVar().getOrder() > 0 && whereTempsToResult[temporary] == NULL) {
+            whereTempsToResult[temporary] = (const AccessNode *) op->lhs.ptr;
+          }
+      })
+      );
+      IndexNotationVisitor::visit(op);
+    }
+  };
+  TemporaryLocsGetter getter(_whereTempsToResult);
+  getter.visit(stmt);
+}
+
 std::map<Forall, vector<Where> > getTemporaryLocations(IndexStmt stmt) {
   struct TemporaryLocsGetter : public IndexNotationVisitor {
     map<Forall, vector<Where> > temporaryLocs;
@@ -3512,6 +3538,9 @@ std::map<Forall, vector<Where> > getTemporaryLocations(IndexStmt stmt) {
 
 
 std::vector<TensorVar> getTemporaries(IndexStmt stmt) {
+  // std::cout << "getTemporaries" << std::endl;
+  // std::cout << "stmt: " << stmt << std::endl;
+
   vector<TensorVar> temporaries;
   bool firstAssignment = true;
   match(stmt,