pass registration update

spcl · Feb 13, 2020 · 678f862 · 678f862
1 parent 36c7222
commit 678f862
Show file tree

Hide file tree

Showing 19 changed files with 208 additions and 132 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,9 +2,6 @@ cmake_minimum_required(VERSION 3.12.4)
 
 project(oec-opt LANGUAGES CXX)
 
-# define options
-option(OEC_GPU_LOWERING_ENABLED "description" ON)
-
 find_package(LLVM REQUIRED CONFIG)
 
 if(LLVM_FOUND)
@@ -15,17 +12,15 @@ if(LLVM_ENABLE_ZLIB)
   find_package(ZLIB)
 endif()
 
-if(OEC_GPU_LOWERING_ENABLED)
-  include(CheckLanguage)
-  check_language(CUDA)
-  if (CMAKE_CUDA_COMPILER)
-    enable_language(CUDA)
-  else()
-    message(SEND_ERROR
-      "Building the GPU lowering of oec-opt requires CUDA")
-  endif()
-  find_library(CUDA_RUNTIME_LIBRARY cuda)
+include(CheckLanguage)
+check_language(CUDA)
+if (CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+else()
+  message(SEND_ERROR
+    "Building the GPU lowering of oec-opt requires CUDA")
 endif()
+find_library(CUDA_RUNTIME_LIBRARY cuda)
 
 # MLIR project.
 set(MLIR_MAIN_SRC_DIR ${LLVM_INCLUDE_DIR}) # --src-root
@@ -36,13 +31,13 @@ set(MLIR_TABLEGEN_EXE mlir-tblgen)
 set(LLVM_LIT_ARGS "-sv" CACHE STRING "lit default options")
 
 # set the output directories
-set( LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin )
-set( LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib )
+set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin )
+set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib )
 
 # import llvm functionality
 list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
-include(AddLLVM)
 include(TableGen)
+include(AddLLVM)
 
 add_definitions(${LLVM_DEFINITIONS})
 include_directories(${LLVM_INCLUDE_DIRS})
@@ -53,16 +48,23 @@ function(mlir_tablegen ofn)
       PARENT_SCOPE)
 endfunction()
 
-function(whole_archive_link target)
-  # currently not working for our passes (instead we added them to target link libraries)
-  #add_dependencies(${target} ${ARGN})
-  set(link_flags "-L${LLVM_LIBRARY_DIR} -L${CMAKE_BINARY_DIR}/lib -Wl,--whole-archive,")
-  FOREACH(LIB ${ARGN})
-    string(CONCAT link_flags ${link_flags} "-l${LIB},")
-  ENDFOREACH(LIB)
-  string(CONCAT link_flags ${link_flags} "--no-whole-archive")
-  set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
-endfunction(whole_archive_link)
+function(add_mlir_dialect dialect dialect_doc_filename)
+  set(LLVM_TARGET_DEFINITIONS ${dialect}.td)
+  mlir_tablegen(${dialect}.h.inc -gen-op-decls)
+  mlir_tablegen(${dialect}.cpp.inc -gen-op-defs)
+  add_public_tablegen_target(MLIR${dialect}IncGen)
+endfunction()
+
+#function(whole_archive_link target)
+#  # currently not working for our passes (instead we added them to target link libraries)
+#  #add_dependencies(${target} ${ARGN})
+#  set(link_flags "-L${LLVM_LIBRARY_DIR} -L${CMAKE_BINARY_DIR}/lib -Wl,--whole-archive,")
+#  FOREACH(LIB ${ARGN})
+#    string(CONCAT link_flags ${link_flags} "-l${LIB},")
+#  ENDFOREACH(LIB)
+#  string(CONCAT link_flags ${link_flags} "--no-whole-archive")
+#  set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
+#endfunction(whole_archive_link)
 
 add_subdirectory(include/)
 add_subdirectory(lib/)

diff --git a/include/Conversion/KernelToCUDA/Passes.h b/include/Conversion/KernelToCUDA/Passes.h
@@ -2,14 +2,21 @@
 #define MLIR_CONVERSION_KERNELTOCUDA_PASSES_H
 
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Module.h"
 
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
+namespace stencil {
 
-std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
-createIndexOptimizationPass();
+std::unique_ptr<OpPassBase<ModuleOp>> createLaunchFuncToCUDACallsPass();
 
+std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>> createIndexOptimizationPass();
+
+void createGPUToCubinPipeline(OpPassManager &pm);
+
+} // namespace stencil
 } // namespace mlir
 
 #endif // MLIR_CONVERSION_KERNELTOCUDA_PASSES_H
diff --git a/include/Conversion/StencilToStandard/Passes.h b/include/Conversion/StencilToStandard/Passes.h
@@ -0,0 +1,17 @@
+#ifndef MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H
+#define MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Module.h"
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace stencil {
+
+std::unique_ptr<OpPassBase<ModuleOp>> createConvertStencilToStandardPass();
+
+} // namespace stencil
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H
diff --git a/include/Dialect/Stencil/CMakeLists.txt b/include/Dialect/Stencil/CMakeLists.txt
@@ -1,8 +1 @@
-set(LLVM_TARGET_DEFINITIONS StencilOps.td)
-
-# Generate the class interfaces
-mlir_tablegen(StencilOps.h.inc -gen-op-decls)
-# Generate the actual implementation
-mlir_tablegen(StencilOps.cpp.inc -gen-op-defs)
-
-add_public_tablegen_target(MLIRStencilOpsIncGen)
+add_mlir_dialect(StencilOps StencilOps)
diff --git a/include/Dialect/Stencil/Passes.h b/include/Dialect/Stencil/Passes.h
@@ -10,11 +10,13 @@
 namespace mlir {
 namespace stencil {
 
-std::unique_ptr<OpPassBase<mlir::ModuleOp>>
-createConvertStencilToStandardPass();
+std::unique_ptr<OpPassBase<ModuleOp>> createCallInliningPass();
 
-std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
-createIndexOptimizationPass();
+std::unique_ptr<OpPassBase<FuncOp>> createStencilInliningPass();
+
+std::unique_ptr<OpPassBase<FuncOp>> createShapeShiftPass();
+
+std::unique_ptr<OpPassBase<FuncOp>> createShapeInferencePass();
 
 } // namespace stencil
 } // namespace mlir

diff --git a/lib/Conversion/KernelToCUDA/CMakeLists.txt b/lib/Conversion/KernelToCUDA/CMakeLists.txt
@@ -1,19 +1,18 @@
 
-if(OEC_GPU_LOWERING_ENABLED)
-  set(LIBS
-
-    MLIRIR
-    MLIRStandardOps
-    )
-
-  add_llvm_library(OECGPUtoCUDATransforms
-    ConvertLaunchFuncToCUDACalls.cpp
-    ConvertKernelFuncToCubin.cpp
-    IndexOptimizationPass.cpp
+set(LIBS
+  MLIRGPU
+  MLIRLLVMIR
+  MLIRNVVMIR
+  MLIRPass
+  MLIRTargetNVVMIR
   )
 
-  target_link_libraries(OECGPUtoCUDATransforms ${LIBS})
+add_llvm_library(OECGPUtoCUDATransforms
+  ConvertLaunchFuncToCUDACalls.cpp
+  ConvertKernelFuncToCubin.cpp
+  IndexOptimizationPass.cpp
+)
 
-  target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_SOURCE_DIR}/include")
-  target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_BINARY_DIR}/include")
-endif()
+target_link_libraries(OECGPUtoCUDATransforms ${LIBS})
+target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_SOURCE_DIR}/include")
+target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_BINARY_DIR}/include")
diff --git a/lib/Conversion/KernelToCUDA/ConvertKernelFuncToCubin.cpp b/lib/Conversion/KernelToCUDA/ConvertKernelFuncToCubin.cpp
@@ -83,18 +83,14 @@ OwnedCubin compilePtxToCubin(const std::string &ptx, Location loc,
   return result;
 }
 
-void pipelineBuilder(OpPassManager &pm) {
+} // namespace
+
+void mlir::stencil::createGPUToCubinPipeline(OpPassManager &pm) {
   pm.addPass(createGpuKernelOutliningPass());
   auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
   kernelPm.addPass(createStripDebugInfoPass());
   kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
-  kernelPm.addPass(createIndexOptimizationPass());
+  kernelPm.addPass(stencil::createIndexOptimizationPass());
   kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
-  pm.addPass(createLowerToLLVMPass(false, false, true));
+  pm.addPass(createLowerToLLVMPass(false, false, true));  
 }
-
-} // namespace
-
-static PassPipelineRegistration<>
-    pipeline("stencil-gpu-to-cubin", "Lowering of stencil kernels to cubins",
-             pipelineBuilder);
diff --git a/lib/Conversion/KernelToCUDA/ConvertLaunchFuncToCUDACalls.cpp b/lib/Conversion/KernelToCUDA/ConvertLaunchFuncToCUDACalls.cpp
@@ -1,3 +1,4 @@
+#include "Conversion/KernelToCUDA/Passes.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Attributes.h"
@@ -453,12 +454,13 @@ LaunchFuncToCUDACallsPass::declareSetupFunc(LLVM::LLVMFuncOp parentOp,
       }
 
       // Put individual components of a memref descriptor into the flat argument
-      // list. We cannot use unpackMemref from LLVM lowering here because we have
-      // no access to MemRefType that had been lowered away.
+      // list. We cannot use unpackMemref from LLVM lowering here because we
+      // have no access to MemRefType that had been lowered away.
       for (int32_t j = 0, ej = llvmType.getStructNumElements(); j < ej; ++j) {
         auto elemType = llvmType.getStructElementType(j);
         if (elemType.isArrayTy()) {
-          for (int32_t k = 0, ek = elemType.getArrayNumElements(); k < ek; ++k) {
+          for (int32_t k = 0, ek = elemType.getArrayNumElements(); k < ek;
+               ++k) {
             Value elem = builder.create<LLVM::ExtractValueOp>(
                 loc, elemType.getArrayElementType(), operand,
                 builder.getI32ArrayAttr({j, k}));
@@ -467,7 +469,7 @@ LaunchFuncToCUDACallsPass::declareSetupFunc(LLVM::LLVMFuncOp parentOp,
         } else {
           assert((elemType.isIntegerTy() || elemType.isFloatTy() ||
                   elemType.isDoubleTy() || elemType.isPointerTy()) &&
-                "expected scalar type");
+                 "expected scalar type");
           Value strct = builder.create<LLVM::ExtractValueOp>(
               loc, elemType, operand, builder.getI32ArrayAttr(j));
           addParamToList(builder, loc, strct, one);
@@ -611,6 +613,10 @@ LaunchFuncToCUDACallsPass::declareRunFunc(LLVM::LLVMFuncOp parentOp,
   return success();
 }
 
+std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createLaunchFuncToCUDACallsPass() {
+  return std::make_unique<LaunchFuncToCUDACallsPass>();
+}
+
 static PassRegistration<LaunchFuncToCUDACallsPass>
     pass("stencil-gpu-to-cuda",
          "Convert all kernel launches to CUDA runtime calls");
diff --git a/lib/Conversion/KernelToCUDA/IndexOptimizationPass.cpp b/lib/Conversion/KernelToCUDA/IndexOptimizationPass.cpp
@@ -135,8 +135,7 @@ void IndexOptimizationPass::runOnOperation() {
 
 } // namespace
 
-std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
-mlir::createIndexOptimizationPass() {
+std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>> mlir::stencil::createIndexOptimizationPass() {
   return std::make_unique<IndexOptimizationPass>();
 }
 

diff --git a/lib/Conversion/StencilToStandard/CMakeLists.txt b/lib/Conversion/StencilToStandard/CMakeLists.txt
@@ -1,11 +1,20 @@
 set(LIBS
 
+  MLIREDSC
   MLIRIR
   MLIRStandardOps
-  )
+  MLIRStencil
+  MLIRLLVMIR
+  MLIRLoopToStandard
+  MLIRStandardToLLVM
+  MLIRTransformUtils
+  LLVMSupport)
 
 add_llvm_library(MLIRStencilToStandard
   ConvertStencilToStandard.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${PROJECT_SOURCE_DIR}/include/Conversion/StencilToStandard
 )
 target_link_libraries(MLIRStencilToStandard ${LIBS})
 

diff --git a/lib/Conversion/StencilToStandard/ConvertStencilToStandard.cpp b/lib/Conversion/StencilToStandard/ConvertStencilToStandard.cpp
@@ -1,5 +1,5 @@
+#include "Conversion/StencilToStandard/Passes.h"
 #include "Conversion/StencilToStandard/ConvertStencilToStandard.h"
-#include "Dialect/Stencil/Passes.h"
 #include "Dialect/Stencil/StencilDialect.h"
 #include "Dialect/Stencil/StencilOps.h"
 #include "Dialect/Stencil/StencilTypes.h"
@@ -259,7 +259,8 @@ class LoadOpLowering : public ConversionPattern {
 
     // Compute the replacement types
     auto inputType = loadOp.field().getType().cast<MemRefType>();
-    SmallVector<int64_t, 3> shape = computeShape(loadOp.getLB(), loadOp.getUB());
+    SmallVector<int64_t, 3> shape =
+        computeShape(loadOp.getLB(), loadOp.getUB());
     SmallVector<int64_t, 3> strides = computeStrides(inputType.getShape());
     auto outputType = computeMemRefType(inputType.getElementType(), shape,
                                         strides, loadOp.getLB(), rewriter);
@@ -445,7 +446,8 @@ class StoreOpLowering : public ConversionPattern {
 
     // Compute the replacement types
     auto inputType = storeOp.field().getType().cast<MemRefType>();
-    SmallVector<int64_t, 3> shape = computeShape(storeOp.getLB(), storeOp.getUB());
+    SmallVector<int64_t, 3> shape =
+        computeShape(storeOp.getLB(), storeOp.getUB());
     SmallVector<int64_t, 3> strides = computeStrides(inputType.getShape());
     auto outputType = computeMemRefType(inputType.getElementType(), shape,
                                         strides, storeOp.getLB(), rewriter);
@@ -520,8 +522,7 @@ void mlir::populateStencilToStandardConversionPatterns(
               AccessOpLowering, StoreOpLowering, ReturnOpLowering>(ctx);
 }
 
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::stencil::createConvertStencilToStandardPass() {
+std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createConvertStencilToStandardPass() {
   return std::make_unique<StencilToStandardPass>();
 }
 

diff --git a/lib/Dialect/Stencil/CMakeLists.txt b/lib/Dialect/Stencil/CMakeLists.txt
@@ -1,25 +1,29 @@
 set(LIBS
-
+
+  MLIREDSC
   MLIRIR
   MLIRStandardOps
-  )
+  MLIRSupport
+  MLIRTransformUtils
+  LLVMSupport)
 
 add_llvm_library(MLIRStencil
-  DialectRegistration.cpp
   StencilDialect.cpp
   StencilOps.cpp
   StencilTypes.cpp
   CallInliningPass.cpp
   StencilInliningPass.cpp
   ShapeInferencePass.cpp
   ShapeShiftPass.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${PROJECT_SOURCE_DIR}/include/Dialect/Stencil
 )
 
 # Make sure that the TableGen generated files are up-to-date
 add_dependencies(MLIRStencil
-
-  ${LIBS}
-  MLIRStencilOpsIncGen)
+  MLIRStencilOpsIncGen
+  ${LIBS})
 target_link_libraries(MLIRStencil ${LIBS})
 
 # specify header directories

diff --git a/lib/Dialect/Stencil/CallInliningPass.cpp b/lib/Dialect/Stencil/CallInliningPass.cpp
@@ -1,3 +1,4 @@
+#include "Dialect/Stencil/Passes.h"
 #include "Dialect/Stencil/StencilDialect.h"
 #include "Dialect/Stencil/StencilOps.h"
 #include "mlir/Dialect/AffineOps/AffineOps.h"
@@ -70,13 +71,13 @@ struct CallInliningPass : public ModulePass<CallInliningPass> {
 
 void CallInliningPass::runOnModule() {
   ModuleOp moduleOp = getModule();
-  
+
   // Walk the body of all stencil functions and apply ops and inline the calls
-  moduleOp.walk([](Operation* op) {
-    if(auto funcOp = dyn_cast<FuncOp>(*op))
+  moduleOp.walk([](Operation *op) {
+    if (auto funcOp = dyn_cast<FuncOp>(*op))
       if (stencil::StencilDialect::isStencilFunction(funcOp))
         funcOp.walk([](stencil::CallOp callOp) { inlineCalls(callOp); });
-    if(auto applyOp = dyn_cast<stencil::ApplyOp>(*op))
+    if (auto applyOp = dyn_cast<stencil::ApplyOp>(*op))
       applyOp.walk([](stencil::CallOp callOp) { inlineCalls(callOp); });
   });
 
@@ -89,5 +90,9 @@ void CallInliningPass::runOnModule() {
 
 } // namespace
 
+std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createCallInliningPass() {
+  return std::make_unique<CallInliningPass>();
+}
+
 static PassRegistration<CallInliningPass> pass("stencil-call-inlining",
                                                "Inline stencil function calls");