Skip to content

Commit

Permalink
pass registration update
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobias Gysi committed Feb 13, 2020
1 parent 36c7222 commit 678f862
Show file tree
Hide file tree
Showing 19 changed files with 208 additions and 132 deletions.
54 changes: 28 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ cmake_minimum_required(VERSION 3.12.4)

project(oec-opt LANGUAGES CXX)

# define options
option(OEC_GPU_LOWERING_ENABLED "description" ON)

find_package(LLVM REQUIRED CONFIG)

if(LLVM_FOUND)
Expand All @@ -15,17 +12,15 @@ if(LLVM_ENABLE_ZLIB)
find_package(ZLIB)
endif()

if(OEC_GPU_LOWERING_ENABLED)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(SEND_ERROR
"Building the GPU lowering of oec-opt requires CUDA")
endif()
find_library(CUDA_RUNTIME_LIBRARY cuda)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(SEND_ERROR
"Building the GPU lowering of oec-opt requires CUDA")
endif()
find_library(CUDA_RUNTIME_LIBRARY cuda)

# MLIR project.
set(MLIR_MAIN_SRC_DIR ${LLVM_INCLUDE_DIR}) # --src-root
Expand All @@ -36,13 +31,13 @@ set(MLIR_TABLEGEN_EXE mlir-tblgen)
set(LLVM_LIT_ARGS "-sv" CACHE STRING "lit default options")

# set the output directories
set( LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin )
set( LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib )
set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin )
set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib )

# import llvm functionality
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)
include(TableGen)
include(AddLLVM)

add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})
Expand All @@ -53,16 +48,23 @@ function(mlir_tablegen ofn)
PARENT_SCOPE)
endfunction()

function(whole_archive_link target)
# currently not working for our passes (instead we added them to target link libraries)
#add_dependencies(${target} ${ARGN})
set(link_flags "-L${LLVM_LIBRARY_DIR} -L${CMAKE_BINARY_DIR}/lib -Wl,--whole-archive,")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "-l${LIB},")
ENDFOREACH(LIB)
string(CONCAT link_flags ${link_flags} "--no-whole-archive")
set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
endfunction(whole_archive_link)
function(add_mlir_dialect dialect dialect_doc_filename)
set(LLVM_TARGET_DEFINITIONS ${dialect}.td)
mlir_tablegen(${dialect}.h.inc -gen-op-decls)
mlir_tablegen(${dialect}.cpp.inc -gen-op-defs)
add_public_tablegen_target(MLIR${dialect}IncGen)
endfunction()

#function(whole_archive_link target)
# # currently not working for our passes (instead we added them to target link libraries)
# #add_dependencies(${target} ${ARGN})
# set(link_flags "-L${LLVM_LIBRARY_DIR} -L${CMAKE_BINARY_DIR}/lib -Wl,--whole-archive,")
# FOREACH(LIB ${ARGN})
# string(CONCAT link_flags ${link_flags} "-l${LIB},")
# ENDFOREACH(LIB)
# string(CONCAT link_flags ${link_flags} "--no-whole-archive")
# set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
#endfunction(whole_archive_link)

add_subdirectory(include/)
add_subdirectory(lib/)
Expand Down
11 changes: 9 additions & 2 deletions include/Conversion/KernelToCUDA/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@
#define MLIR_CONVERSION_KERNELTOCUDA_PASSES_H

#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"

#include "mlir/Pass/Pass.h"

namespace mlir {
namespace stencil {

std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
createIndexOptimizationPass();
std::unique_ptr<OpPassBase<ModuleOp>> createLaunchFuncToCUDACallsPass();

std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>> createIndexOptimizationPass();

void createGPUToCubinPipeline(OpPassManager &pm);

} // namespace stencil
} // namespace mlir

#endif // MLIR_CONVERSION_KERNELTOCUDA_PASSES_H
17 changes: 17 additions & 0 deletions include/Conversion/StencilToStandard/Passes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H
#define MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H

#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Module.h"

#include "mlir/Pass/Pass.h"

namespace mlir {
namespace stencil {

std::unique_ptr<OpPassBase<ModuleOp>> createConvertStencilToStandardPass();

} // namespace stencil
} // namespace mlir

#endif // MLIR_CONVERSION_STENCILTOSTANDARD_PASSES_H
9 changes: 1 addition & 8 deletions include/Dialect/Stencil/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1 @@
set(LLVM_TARGET_DEFINITIONS StencilOps.td)

# Generate the class interfaces
mlir_tablegen(StencilOps.h.inc -gen-op-decls)
# Generate the actual implementation
mlir_tablegen(StencilOps.cpp.inc -gen-op-defs)

add_public_tablegen_target(MLIRStencilOpsIncGen)
add_mlir_dialect(StencilOps StencilOps)
10 changes: 6 additions & 4 deletions include/Dialect/Stencil/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
namespace mlir {
namespace stencil {

std::unique_ptr<OpPassBase<mlir::ModuleOp>>
createConvertStencilToStandardPass();
std::unique_ptr<OpPassBase<ModuleOp>> createCallInliningPass();

std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
createIndexOptimizationPass();
std::unique_ptr<OpPassBase<FuncOp>> createStencilInliningPass();

std::unique_ptr<OpPassBase<FuncOp>> createShapeShiftPass();

std::unique_ptr<OpPassBase<FuncOp>> createShapeInferencePass();

} // namespace stencil
} // namespace mlir
Expand Down
29 changes: 14 additions & 15 deletions lib/Conversion/KernelToCUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@

if(OEC_GPU_LOWERING_ENABLED)
set(LIBS

MLIRIR
MLIRStandardOps
)

add_llvm_library(OECGPUtoCUDATransforms
ConvertLaunchFuncToCUDACalls.cpp
ConvertKernelFuncToCubin.cpp
IndexOptimizationPass.cpp
set(LIBS
MLIRGPU
MLIRLLVMIR
MLIRNVVMIR
MLIRPass
MLIRTargetNVVMIR
)

target_link_libraries(OECGPUtoCUDATransforms ${LIBS})
add_llvm_library(OECGPUtoCUDATransforms
ConvertLaunchFuncToCUDACalls.cpp
ConvertKernelFuncToCubin.cpp
IndexOptimizationPass.cpp
)

target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_SOURCE_DIR}/include")
target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_BINARY_DIR}/include")
endif()
target_link_libraries(OECGPUtoCUDATransforms ${LIBS})
target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_SOURCE_DIR}/include")
target_include_directories(OECGPUtoCUDATransforms PUBLIC "${PROJECT_BINARY_DIR}/include")
14 changes: 5 additions & 9 deletions lib/Conversion/KernelToCUDA/ConvertKernelFuncToCubin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,14 @@ OwnedCubin compilePtxToCubin(const std::string &ptx, Location loc,
return result;
}

void pipelineBuilder(OpPassManager &pm) {
} // namespace

void mlir::stencil::createGPUToCubinPipeline(OpPassManager &pm) {
pm.addPass(createGpuKernelOutliningPass());
auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
kernelPm.addPass(createStripDebugInfoPass());
kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
kernelPm.addPass(createIndexOptimizationPass());
kernelPm.addPass(stencil::createIndexOptimizationPass());
kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
pm.addPass(createLowerToLLVMPass(false, false, true));
pm.addPass(createLowerToLLVMPass(false, false, true));
}

} // namespace

static PassPipelineRegistration<>
pipeline("stencil-gpu-to-cubin", "Lowering of stencil kernels to cubins",
pipelineBuilder);
14 changes: 10 additions & 4 deletions lib/Conversion/KernelToCUDA/ConvertLaunchFuncToCUDACalls.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "Conversion/KernelToCUDA/Passes.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
Expand Down Expand Up @@ -453,12 +454,13 @@ LaunchFuncToCUDACallsPass::declareSetupFunc(LLVM::LLVMFuncOp parentOp,
}

// Put individual components of a memref descriptor into the flat argument
// list. We cannot use unpackMemref from LLVM lowering here because we have
// no access to MemRefType that had been lowered away.
// list. We cannot use unpackMemref from LLVM lowering here because we
// have no access to MemRefType that had been lowered away.
for (int32_t j = 0, ej = llvmType.getStructNumElements(); j < ej; ++j) {
auto elemType = llvmType.getStructElementType(j);
if (elemType.isArrayTy()) {
for (int32_t k = 0, ek = elemType.getArrayNumElements(); k < ek; ++k) {
for (int32_t k = 0, ek = elemType.getArrayNumElements(); k < ek;
++k) {
Value elem = builder.create<LLVM::ExtractValueOp>(
loc, elemType.getArrayElementType(), operand,
builder.getI32ArrayAttr({j, k}));
Expand All @@ -467,7 +469,7 @@ LaunchFuncToCUDACallsPass::declareSetupFunc(LLVM::LLVMFuncOp parentOp,
} else {
assert((elemType.isIntegerTy() || elemType.isFloatTy() ||
elemType.isDoubleTy() || elemType.isPointerTy()) &&
"expected scalar type");
"expected scalar type");
Value strct = builder.create<LLVM::ExtractValueOp>(
loc, elemType, operand, builder.getI32ArrayAttr(j));
addParamToList(builder, loc, strct, one);
Expand Down Expand Up @@ -611,6 +613,10 @@ LaunchFuncToCUDACallsPass::declareRunFunc(LLVM::LLVMFuncOp parentOp,
return success();
}

std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createLaunchFuncToCUDACallsPass() {
return std::make_unique<LaunchFuncToCUDACallsPass>();
}

static PassRegistration<LaunchFuncToCUDACallsPass>
pass("stencil-gpu-to-cuda",
"Convert all kernel launches to CUDA runtime calls");
3 changes: 1 addition & 2 deletions lib/Conversion/KernelToCUDA/IndexOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,7 @@ void IndexOptimizationPass::runOnOperation() {

} // namespace

std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>>
mlir::createIndexOptimizationPass() {
std::unique_ptr<OpPassBase<LLVM::LLVMFuncOp>> mlir::stencil::createIndexOptimizationPass() {
return std::make_unique<IndexOptimizationPass>();
}

Expand Down
11 changes: 10 additions & 1 deletion lib/Conversion/StencilToStandard/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
set(LIBS

MLIREDSC
MLIRIR
MLIRStandardOps
)
MLIRStencil
MLIRLLVMIR
MLIRLoopToStandard
MLIRStandardToLLVM
MLIRTransformUtils
LLVMSupport)

add_llvm_library(MLIRStencilToStandard
ConvertStencilToStandard.cpp

ADDITIONAL_HEADER_DIRS
${PROJECT_SOURCE_DIR}/include/Conversion/StencilToStandard
)
target_link_libraries(MLIRStencilToStandard ${LIBS})

Expand Down
11 changes: 6 additions & 5 deletions lib/Conversion/StencilToStandard/ConvertStencilToStandard.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "Conversion/StencilToStandard/Passes.h"
#include "Conversion/StencilToStandard/ConvertStencilToStandard.h"
#include "Dialect/Stencil/Passes.h"
#include "Dialect/Stencil/StencilDialect.h"
#include "Dialect/Stencil/StencilOps.h"
#include "Dialect/Stencil/StencilTypes.h"
Expand Down Expand Up @@ -259,7 +259,8 @@ class LoadOpLowering : public ConversionPattern {

// Compute the replacement types
auto inputType = loadOp.field().getType().cast<MemRefType>();
SmallVector<int64_t, 3> shape = computeShape(loadOp.getLB(), loadOp.getUB());
SmallVector<int64_t, 3> shape =
computeShape(loadOp.getLB(), loadOp.getUB());
SmallVector<int64_t, 3> strides = computeStrides(inputType.getShape());
auto outputType = computeMemRefType(inputType.getElementType(), shape,
strides, loadOp.getLB(), rewriter);
Expand Down Expand Up @@ -445,7 +446,8 @@ class StoreOpLowering : public ConversionPattern {

// Compute the replacement types
auto inputType = storeOp.field().getType().cast<MemRefType>();
SmallVector<int64_t, 3> shape = computeShape(storeOp.getLB(), storeOp.getUB());
SmallVector<int64_t, 3> shape =
computeShape(storeOp.getLB(), storeOp.getUB());
SmallVector<int64_t, 3> strides = computeStrides(inputType.getShape());
auto outputType = computeMemRefType(inputType.getElementType(), shape,
strides, storeOp.getLB(), rewriter);
Expand Down Expand Up @@ -520,8 +522,7 @@ void mlir::populateStencilToStandardConversionPatterns(
AccessOpLowering, StoreOpLowering, ReturnOpLowering>(ctx);
}

std::unique_ptr<OpPassBase<ModuleOp>>
mlir::stencil::createConvertStencilToStandardPass() {
std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createConvertStencilToStandardPass() {
return std::make_unique<StencilToStandardPass>();
}

Expand Down
16 changes: 10 additions & 6 deletions lib/Dialect/Stencil/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
set(LIBS


MLIREDSC
MLIRIR
MLIRStandardOps
)
MLIRSupport
MLIRTransformUtils
LLVMSupport)

add_llvm_library(MLIRStencil
DialectRegistration.cpp
StencilDialect.cpp
StencilOps.cpp
StencilTypes.cpp
CallInliningPass.cpp
StencilInliningPass.cpp
ShapeInferencePass.cpp
ShapeShiftPass.cpp

ADDITIONAL_HEADER_DIRS
${PROJECT_SOURCE_DIR}/include/Dialect/Stencil
)

# Make sure that the TableGen generated files are up-to-date
add_dependencies(MLIRStencil

${LIBS}
MLIRStencilOpsIncGen)
MLIRStencilOpsIncGen
${LIBS})
target_link_libraries(MLIRStencil ${LIBS})

# specify header directories
Expand Down
13 changes: 9 additions & 4 deletions lib/Dialect/Stencil/CallInliningPass.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "Dialect/Stencil/Passes.h"
#include "Dialect/Stencil/StencilDialect.h"
#include "Dialect/Stencil/StencilOps.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
Expand Down Expand Up @@ -70,13 +71,13 @@ struct CallInliningPass : public ModulePass<CallInliningPass> {

void CallInliningPass::runOnModule() {
ModuleOp moduleOp = getModule();

// Walk the body of all stencil functions and apply ops and inline the calls
moduleOp.walk([](Operation* op) {
if(auto funcOp = dyn_cast<FuncOp>(*op))
moduleOp.walk([](Operation *op) {
if (auto funcOp = dyn_cast<FuncOp>(*op))
if (stencil::StencilDialect::isStencilFunction(funcOp))
funcOp.walk([](stencil::CallOp callOp) { inlineCalls(callOp); });
if(auto applyOp = dyn_cast<stencil::ApplyOp>(*op))
if (auto applyOp = dyn_cast<stencil::ApplyOp>(*op))
applyOp.walk([](stencil::CallOp callOp) { inlineCalls(callOp); });
});

Expand All @@ -89,5 +90,9 @@ void CallInliningPass::runOnModule() {

} // namespace

std::unique_ptr<OpPassBase<ModuleOp>> mlir::stencil::createCallInliningPass() {
return std::make_unique<CallInliningPass>();
}

static PassRegistration<CallInliningPass> pass("stencil-call-inlining",
"Inline stencil function calls");
Loading

0 comments on commit 678f862

Please sign in to comment.