minimal changes to support ispc exec

adhithadias · Jun 28, 2021 · 7d4b8b6 · 7d4b8b6
1 parent 2edd480
commit 7d4b8b6
Show file tree

Hide file tree

Showing 10 changed files with 132 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,6 @@ CMakeCache.txt
 doc
 
 apps/tensor_times_vector/tensor_times_vector
+
+.cache
+compile_commands.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,10 +10,12 @@ project(taco
   LANGUAGES C CXX
 )
 option(CUDA "Build for NVIDIA GPU (CUDA must be preinstalled)" OFF)
+option(ISPC "Build for Intel ISPC Compiler (ISPC Compiler must be preinstalled)" OFF)
 option(PYTHON "Build TACO for python environment" OFF)
 option(OPENMP "Build with OpenMP execution support" OFF)
 option(COVERAGE "Build with code coverage analysis" OFF)
 set(TACO_FEATURE_CUDA 0)
+set(TACO_FEATURE_ISPC 0)
 set(TACO_FEATURE_OPENMP 0)
 set(TACO_FEATURE_PYTHON 0)
 if(CUDA)
@@ -22,6 +24,11 @@ if(CUDA)
   add_definitions(-DCUDA_BUILT)
   set(TACO_FEATURE_CUDA 1)
 endif(CUDA)
+if(ISPC)
+  message("-- Searching for ISPC Installation")
+  add_definitions(-DISPC_BUILT)
+  set(TACO_FEATURE_ISPC 1)
+endif(ISPC) 
 if(OPENMP)
   message("-- Will use OpenMP for parallel execution")
   add_definitions(-DUSE_OPENMP)

diff --git a/include/taco/cuda.h b/include/taco/cuda.h
@@ -9,7 +9,17 @@
   #define CUDA_BUILT false
 #endif
 
+#ifndef ISPC_BUILT
+  #define ISPC_BUILT false
+#endif
+
 namespace taco {
+
+/// Functions used by taco to interface with ISPC
+bool should_use_ISPC_codegen();
+void set_ISPC_codegen_enabled(bool enabled);
+
+
 /// Functions used by taco to interface with CUDA (especially unified memory)
 /// Check if should use CUDA codegen
 bool should_use_CUDA_codegen();

diff --git a/include/taco/version.h.in b/include/taco/version.h.in
@@ -20,5 +20,6 @@
 #define TACO_FEATURE_OPENMP @TACO_FEATURE_OPENMP@
 #define TACO_FEATURE_PYTHON @TACO_FEATURE_PYTHON@
 #define TACO_FEATURE_CUDA   @TACO_FEATURE_CUDA@
+#define TACO_FEATURE_ISPC   @TACO_FEATURE_ISPC@
 
 #endif /* TACO_VERSION_H */
diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
@@ -2,6 +2,7 @@
 #include "taco/cuda.h"
 #include "codegen_cuda.h"
 #include "codegen_c.h"
+#include "codegen_ispc.h"
 #include <algorithm>
 #include <unordered_set>
 
@@ -26,6 +27,9 @@ shared_ptr<CodeGen> CodeGen::init_default(std::ostream &dest, OutputKind outputK
   if (should_use_CUDA_codegen()) {
     return make_shared<CodeGen_CUDA>(dest, outputKind);
   }
+  else if (should_use_ISPC_codegen()) {
+    return make_shared<CodeGen_ISPC>(dest, outputKind);
+  }
   else {
     return make_shared<CodeGen_C>(dest, outputKind);
   }

diff --git a/src/codegen/codegen_ispc.h b/src/codegen/codegen_ispc.h
@@ -1,5 +1,5 @@
-#ifndef TACO_BACKEND_C_H
-#define TACO_BACKEND_C_H
+#ifndef TACO_BACKEND_ISPC_H
+#define TACO_BACKEND_ISPC_H
 #include <map>
 #include <vector>
 

diff --git a/src/codegen/module.cpp b/src/codegen/module.cpp
@@ -13,6 +13,7 @@
 #include "taco/util/strings.h"
 #include "taco/util/env.h"
 #include "codegen/codegen_c.h"
+#include "codegen/codegen_ispc.h"
 #include "codegen/codegen_cuda.h"
 #include "taco/cuda.h"
 
@@ -89,6 +90,9 @@ void writeShims(vector<Stmt> funcs, string path, string prefix) {
     if (should_use_CUDA_codegen()) {
       CodeGen_CUDA::generateShim(func, shims);
     }
+    else if (should_use_ISPC_codegen()) {
+      CodeGen_ISPC::generateShim(func, shims);
+    }
     else {
       CodeGen_C::generateShim(func, shims);
     }
@@ -98,6 +102,9 @@ void writeShims(vector<Stmt> funcs, string path, string prefix) {
   if (should_use_CUDA_codegen()) {
     shims_file.open(path+prefix+"_shims.cpp");
   }
+  else if (should_use_ISPC_codegen()) {
+    shims_file.open(path+prefix+".ispc", ios::app);
+  }
   else {
     shims_file.open(path+prefix+".c", ios::app);
   }

diff --git a/src/cuda.cpp b/src/cuda.cpp
@@ -7,6 +7,17 @@
 
 using namespace std;
 namespace taco {
+
+static bool ISPC_codegen_enabled = ISPC_BUILT;
+bool should_use_ISPC_codegen() {
+  return ISPC_codegen_enabled;
+}
+
+void set_ISPC_codegen_enabled(bool enabled) {
+  ISPC_codegen_enabled = enabled;
+}
+
+
 /// Functions used by taco to interface with CUDA (especially unified memory)
 static bool CUDA_codegen_enabled = CUDA_BUILT;
 static bool CUDA_unified_memory_enabled = CUDA_BUILT;

diff --git a/test/tests-scheduling-eval.cpp b/test/tests-scheduling-eval.cpp
@@ -1,5 +1,7 @@
+#include <iostream>
 #include <taco/index_notation/transformations.h>
 #include <codegen/codegen_c.h>
+#include <codegen/codegen_ispc.h>
 #include <codegen/codegen_cuda.h>
 #include <fstream>
 #include "test.h"
@@ -44,6 +46,14 @@ IndexStmt scheduleSpMVCPU(IndexStmt stmt, int CHUNK_SIZE=16) {
           .parallelize(i0, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces);
 }
 
+IndexStmt scheduleSpMVISPC(IndexStmt stmt, int CHUNK_SIZE=16) {
+  IndexVar i0("i0"), i1("i1"), kpos("kpos"), kpos0("kpos0"), kpos1("kpos1");
+  return stmt;
+  // return stmt.split(i, i0, i1, CHUNK_SIZE)
+  //         .reorder({i0, i1, j})
+  //         .parallelize(i0, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces);
+}
+
 IndexStmt scheduleSpMMCPU(IndexStmt stmt, Tensor<double> A, int CHUNK_SIZE=16, int UNROLL_FACTOR=8) {
   IndexVar i0("i0"), i1("i1"), kbounded("kbounded"), k0("k0"), k1("k1"), jpos("jpos"), jpos0("jpos0"), jpos1("jpos1");
   return stmt.split(i, i0, i1, CHUNK_SIZE)
@@ -1463,7 +1473,63 @@ TEST(scheduling_eval, mttkrpGPU) {
   ASSERT_TENSOR_EQ(expected, A);
 }
 
-TEST(generate_evaluation_files, DISABLED_cpu) {
+
+
+TEST(generate_ispc_evaluation_files, ispc) {
+  std::cout << "Hi Adhitha!\n" << std::endl ;
+  set_CUDA_codegen_enabled(false);
+  set_ISPC_codegen_enabled(true);
+
+  vector<vector<int>> spmv_parameters = {{32}};
+  vector<vector<int>> spmspv_parameters = {{8}};
+
+  // 4 to 512 and 4, 8, 16
+  vector<vector<int>> spmm_dcsr_parameters = {{16, 8}};
+  vector<vector<int>> spmm_parameters = {{16,4}};
+
+  vector<vector<int>> mttkrp_parameters = {};
+  mttkrp_parameters.push_back({64,0});
+
+  vector<vector<int>> sddmm_parameters = {{8, 8}};
+  vector<vector<int>> ttv_parameters = {{32}};
+
+  int NUM_I = 100;
+  int NUM_J = 100;
+
+  string file_ending = ".ispc";
+  string file_path = "eval_prepared_ispc/";
+  mkdir(file_path.c_str(), 0777);
+
+  // spmv
+  {
+    stringstream source;
+    std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(source, ir::CodeGen::ImplementationGen);
+    Tensor<double> A("A", {NUM_I, NUM_J}, CSR);
+    Tensor<double> x("x", {NUM_J}, {Dense});
+    Tensor<double> y("y", {NUM_I}, {Dense});
+    y(i) = A(i, j) * x(j);
+    std::cout << "concretizing the assignment statement\n";
+    IndexStmt stmt = y.getAssignment().concretize();
+    std::cout << "Printing the original IndexStmt: " << stmt << std::endl;
+    for (auto paramSet : spmv_parameters) {
+      std::cout << "param set: " << paramSet[0] << std::endl;
+      IndexStmt scheduled = scheduleSpMVISPC(stmt, paramSet[0]);
+      std::cout << "scheduled IndexStmt: " << scheduled << std::endl;
+      ir::Stmt compute = lower(scheduled, "spmv_csr_ispc_taco",  false, true);
+      std::cout << "computed statement: \n" << compute << std::endl;
+      codegen->compile(compute, false);
+    }
+    ofstream source_file;
+    source_file.open(file_path + "spmv_csr_ispc_taco.h");
+    source_file << source.str();
+    source_file.close();
+  }
+
+
+  return;
+}
+
+TEST(generate_evaluation_files, cpu) {
   if (should_use_CUDA_codegen()) {
     return;
   }
@@ -1779,7 +1845,7 @@ TEST(generate_evaluation_files, DISABLED_cpu) {
   }
 }
 
-TEST(generate_evaluation_files, DISABLED_gpu) {
+TEST(generate_evaluation_files, gpu) {
   if (!should_use_CUDA_codegen()) {
     return;
   }

diff --git a/tools/taco.cpp b/tools/taco.cpp
@@ -20,6 +20,7 @@
 #include "taco/lower/lower.h"
 #include "taco/codegen/module.h"
 #include "codegen/codegen_c.h"
+#include "codegen/codegen_ispc.h"
 #include "codegen/codegen_cuda.h"
 #include "codegen/codegen.h"
 #include "taco/util/strings.h"
@@ -188,6 +189,8 @@ static void printUsageInfo() {
   cout << endl;
   printFlag("print-nocolor", "Print without colors.");
   cout << endl;
+  printFlag("ispc", "Generate ISPC code for Intel CPUs");
+  cout << endl;
   printFlag("cuda", "Generate CUDA code for NVIDIA GPUs");
   cout << endl;
   printFlag("schedule", "Specify parallel execution schedule");
@@ -279,6 +282,8 @@ static void printVersionInfo() {
     cout << "Built with Python support." << endl;
   if(TACO_FEATURE_CUDA)
     cout << "Built with CUDA support." << endl;
+  if(TACO_FEATURE_ISPC)
+    cout << "Built with ISPC support." << endl;
   cout << endl;
   cout << "Built on: " << TACO_BUILD_DATE << endl;
   cout << "CMake build type: " << TACO_BUILD_TYPE << endl;
@@ -641,6 +646,7 @@ int main(int argc, char* argv[]) {
   bool color               = true;
   bool readKernels         = false;
   bool cuda                = false;
+  bool ispc                = false;
 
   bool setSchedule         = false;
 
@@ -949,6 +955,10 @@ int main(int argc, char* argv[]) {
     else if ("-cuda" == argName) {
       cuda = true;
     }
+    else if ("-ispc" == argName) {
+      std::cout << "ispc true\n";
+      ispc = true;
+    }
     else if ("-schedule" == argName) {
       vector<string> descriptor = util::split(argValue, ",");
       if (descriptor.size() > 2 || descriptor.empty()) {
@@ -1129,9 +1139,18 @@ int main(int argc, char* argv[]) {
       return reportError("TACO must be built for CUDA (cmake -DCUDA=ON ..) to benchmark", 2);
     }
     set_CUDA_codegen_enabled(true);
+    set_ISPC_codegen_enabled(false);
+  }
+  else if (ispc) {
+    if (!ISPC_BUILT && benchmark) {
+      return reportError("TACO must be built for ISPC (cmake -DISPC=ON .. to benchmark", 2);
+    }
+    set_CUDA_codegen_enabled(false);
+    set_ISPC_codegen_enabled(true);
   }
   else {
     set_CUDA_codegen_enabled(false);
+    set_ISPC_codegen_enabled(false);
   }
 
   stmt = scalarPromote(stmt);
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,6 @@ CMakeCache.txt @@
     doc
     apps/tensor_times_vector/tensor_times_vector
+    .cache
+    compile_commands.json