Skip to content

Commit

Permalink
adding hsaco
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobias Gysi committed Jun 10, 2020
1 parent 957f81b commit 3916f13
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 7 deletions.
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(TableGen)
include(AddLLVM)
include(AddMLIR)

include(HandleLLVMOptions)

include_directories(${LLVM_INCLUDE_DIRS})
Expand All @@ -32,9 +33,13 @@ link_directories(${LLVM_BUILD_LIBRARY_DIR})
add_definitions(${LLVM_DEFINITIONS})

set(OEC_CUDA_BACKEND_ENABLED 1 CACHE BOOL "Enable building the oec CUDA backend")
set(OEC_ROCM_BACKEND_ENABLED 0 CACHE BOOL "Enable building the oec ROCM backend")
if(OEC_CUDA_BACKEND_ENABLED)
add_definitions(-DOEC_CUDA_BACKEND_ENABLED)
endif()
if(OEC_ROCM_BACKEND_ENABLED)
add_definitions(-DOEC_ROCM_BACKEND_ENABLED)
endif()

if (OEC_CUDA_BACKEND_ENABLED)
if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD))
Expand All @@ -49,6 +54,13 @@ if (OEC_CUDA_BACKEND_ENABLED)
endif()
find_library(CUDA_RUNTIME_LIBRARY cuda)
endif()
if (OEC_ROCM_BACKEND_ENABLED)
if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
message(SEND_ERROR "Building the oec ROCM backend requires AMDGPU")
endif()

find_package(LLD REQUIRED CONFIG)
endif()

add_subdirectory(include)
add_subdirectory(lib)
Expand Down
1 change: 1 addition & 0 deletions include/Conversion/LoopsToCUDA/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ OwnedBlob compilePtxToCubin(const std::string &ptx, Location loc,
StringRef name);

void registerGPUToCUBINPipeline();
void registerGPUToHSACOPipeline();

} // namespace mlir

Expand Down
21 changes: 20 additions & 1 deletion lib/Conversion/LoopsToCUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,26 @@ if(OEC_CUDA_BACKEND_ENABLED)
MLIRTargetNVVMIR
)
endif()
if(OEC_ROCM_BACKEND_ENABLED)
set(AMDGPU_LIBS
AMDGPUAsmParser
AMDGPUCodeGen
AMDGPUDesc
AMDGPUInfo
)
set(ROCDLIR_LIB
lldCommon
lldDriver
lldELF
MLIRROCDLIR
MLIRTargetROCDLIR
)
endif()

add_mlir_dialect_library(GPUToKernelAndRuntimeCalls
ConvertLaunchFuncToCUDACalls.cpp
ConvertKernelFuncToCubin.cpp
ConvertKernelFuncToHsaco.cpp
StencilLoopMappingPass.cpp

ADDITIONAL_HEADER_DIRS
Expand All @@ -21,10 +37,13 @@ add_mlir_dialect_library(GPUToKernelAndRuntimeCalls
LINK_COMPONENTS
Core
MC
MCParser
${NVPTX_LIBS}

${AMDGPU_LIBS}

LINK_LIBS PUBLIC
${NVVMIR_LIB}
${ROCDLIR_LIB}

DEPENDS
MLIRLoopsToCUDAPassIncGen
Expand Down
10 changes: 4 additions & 6 deletions lib/Conversion/LoopsToCUDA/ConvertKernelFuncToCubin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,10 @@ void registerGPUToCUBINPipeline() {
kernelPm.addPass(createConvertGPUKernelToBlobPass(
translateModuleToNVVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
"sm_35", "+ptx60", "nvvm.cubin"));
pm.addPass(createLowerToLLVMPass({
/* useBarePtrCallConv */ false,
/* emitCWrappers */ true,
/* indexBitwidth */ 32,
/* useAlignedAlloc */ false
}));
pm.addPass(createLowerToLLVMPass({/* useBarePtrCallConv */ false,
/* emitCWrappers */ true,
/* indexBitwidth */ 32,
/* useAlignedAlloc */ false}));
});
}
} // namespace mlir
Expand Down
212 changes: 212 additions & 0 deletions lib/Conversion/LoopsToCUDA/ConvertKernelFuncToHsaco.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#include "Conversion/LoopsToCUDA/Passes.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/ROCDLIR.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"

#ifdef OEC_ROCM_BACKEND_ENABLED

// MC headers.
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"

// lld headers.
#include "lld/Common/Driver.h"

using namespace mlir;
using namespace llvm;

using Blob = SmallVector<char, 0>;
constexpr char tripleName[] = "amdgcn-amd-amdhsa";
constexpr char targetChip[] = "gfx1010";
constexpr char features[] = "-code-object-v3";

static LogicalResult assembleIsa(const std::string isa, StringRef name,
Blob &result) {
raw_svector_ostream os(result);

std::string error;
Triple theTriple(Triple::normalize(tripleName));
const Target *theTarget =
TargetRegistry::lookupTarget(theTriple.normalize(), error);
if (!theTarget) {
WithColor::error(errs(), name) << error;
return failure();
}

SourceMgr srcMgr;
srcMgr.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(isa), SMLoc());

const MCTargetOptions mcOptions;
std::unique_ptr<MCRegisterInfo> mri(theTarget->createMCRegInfo(tripleName));
std::unique_ptr<MCAsmInfo> mai(
theTarget->createMCAsmInfo(*mri, tripleName, mcOptions));
mai->setRelaxELFRelocations(true);

MCObjectFileInfo mofi;
MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions);
mofi.InitMCObjectFileInfo(theTriple, false, ctx, false);

SmallString<128> cwd;
if (!sys::fs::current_path(cwd))
ctx.setCompilationDir(cwd);

std::unique_ptr<MCStreamer> mcStreamer;
std::unique_ptr<MCInstrInfo> mcii(theTarget->createMCInstrInfo());
std::unique_ptr<MCSubtargetInfo> sti(
theTarget->createMCSubtargetInfo(tripleName, targetChip, features));

MCCodeEmitter *ce = theTarget->createMCCodeEmitter(*mcii, *mri, ctx);
MCAsmBackend *mab = theTarget->createMCAsmBackend(*sti, *mri, mcOptions);
mcStreamer.reset(theTarget->createMCObjectStreamer(
theTriple, ctx, std::unique_ptr<MCAsmBackend>(mab),
mab->createObjectWriter(os), std::unique_ptr<MCCodeEmitter>(ce), *sti,
mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
mcStreamer->setUseAssemblerInfoForParsing(true);

std::unique_ptr<MCAsmParser> parser(
createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
std::unique_ptr<MCTargetAsmParser> tap(
theTarget->createMCAsmParser(*sti, *parser, *mcii, mcOptions));

if (!tap) {
WithColor::error(errs(), name) << "assembler initialization error.\n";
return failure();
}

parser->setTargetParser(*tap);
parser->Run(false);

return success();
}

static LogicalResult createHsaco(const Blob &isaBlob, StringRef name,
Blob &hsacoBlob) {
// Save the ISA binary to a temp file.
int tempIsaBinaryFd = -1;
SmallString<128> tempIsaBinaryFilename;
std::error_code ec = sys::fs::createTemporaryFile(
"kernel", "o", tempIsaBinaryFd, tempIsaBinaryFilename);
if (ec) {
WithColor::error(errs(), name)
<< "temporary file for ISA binary creation error.\n";
return failure();
}
FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
tempIsaBinaryOs << isaBlob;
tempIsaBinaryOs.close();

// Create a temp file for HSA code object.
int tempHsacoFD = -1;
SmallString<128> tempHsacoFilename;
ec = sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
tempHsacoFilename);
if (ec) {
WithColor::error(errs(), name)
<< "temporary file for HSA code object creation error.\n";
return failure();
}
FileRemover cleanupHsaco(tempHsacoFilename);

// Invoke lld. Expect a true return value from lld.
bool ret = lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
"-o", tempHsacoFilename.c_str()},
/*canEarlyExit=*/false, llvm::outs(), llvm::errs());
if (!ret) {
WithColor::error(errs(), name) << "lld invocation error.\n";
return failure();
}

// Load the HSA code object.
auto hsacoFile = mlir::openInputFile(tempHsacoFilename);
if (!hsacoFile) {
WithColor::error(errs(), name)
<< "read HSA code object from temp file error.\n";
return failure();
}
hsacoBlob.assign(hsacoFile->getBuffer().begin(),
hsacoFile->getBuffer().end());

return success();
}

static std::unique_ptr<llvm::Module> compileModuleToROCDLIR(Operation *m) {
auto llvmModule = translateModuleToROCDLIR(m);
// TODO(whchung): Link with ROCm-Device-Libs in case needed (ex: the Module
// depends on math functions).
return llvmModule;
}

static OwnedBlob compileISAToHsaco(const std::string isa, Location loc,
StringRef name) {
// ISA -> ISA in binary form via MC.
// Use lld to create HSA code object.
Blob isaBlob;
Blob hsacoBlob;

if (succeeded(assembleIsa(isa, name, isaBlob)) &&
succeeded(createHsaco(isaBlob, name, hsacoBlob)))
return std::make_unique<std::vector<char>>(hsacoBlob.begin(),
hsacoBlob.end());

WithColor::error(errs(), name) << "producing HSA code object error.\n";
return {};
}

namespace mlir {
void registerGPUToHSACOPipeline() {
PassPipelineRegistration<>(
"stencil-kernel-to-hsaco", "Lower kernels to hsaco",
[](OpPassManager &pm) {
// Initialize LLVM AMDGPU backend.
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
LLVMInitializeAMDGPUAsmParser();

// Define the bitwidth
pm.addPass(createGpuKernelOutliningPass());
auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
kernelPm.addPass(createStripDebugInfoPass());
kernelPm.addPass(createLowerGpuOpsToROCDLOpsPass(32));
kernelPm.addPass(createConvertGPUKernelToBlobPass(
compileModuleToROCDLIR, compileISAToHsaco, tripleName, targetChip,
features,
/*gpuBinaryAnnotation=*/"rocdl.hsaco"));
pm.addPass(createLowerToLLVMPass({/* useBarePtrCallConv */ false,
/* emitCWrappers */ true,
/* indexBitwidth */ 32,
/* useAlignedAlloc */ false}));
});
}
} // namespace mlir
#endif
3 changes: 3 additions & 0 deletions oec-opt/oec-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ int main(int argc, char **argv) {
#ifdef OEC_CUDA_BACKEND_ENABLED
registerGPUToCUBINPipeline();
#endif
#ifdef OEC_ROCM_BACKEND_ENABLED
registerGPUToHSACOPipeline();
#endif

// Register the stencil passes
createShapeInferencePass();
Expand Down

0 comments on commit 3916f13

Please sign in to comment.