From 5e2f81d6a0f22f758bcbf669d0cf3883207ffa5a Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Fri, 30 Aug 2024 22:16:51 -0500 Subject: [PATCH] [WIP] use peano for kernels fixes https://github.com/nod-ai/iree-amd-aie/issues/637 --- build_tools/ci/run_matmul_test.sh | 26 +-- build_tools/download_peano.sh | 2 +- .../iree-amd-aie/Target/CMakeLists.txt | 13 +- .../iree-amd-aie/Target/PeanoDriver.cpp | 105 +++++++++++ .../AMD-AIE/iree-amd-aie/Target/PeanoDriver.h | 28 +++ .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 172 +++++++++--------- 6 files changed, 237 insertions(+), 109 deletions(-) mode change 100644 => 100755 build_tools/download_peano.sh create mode 100644 compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.cpp create mode 100644 compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.h diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index 5328eb8d3..a24f40126 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -182,7 +182,7 @@ function run_matmul_test() { local amd_aie_install_path="${IREE_INSTALL_DIR}" - local vitis_path="${VITIS}" + local vitis_path="" local use_chess="false" @@ -540,16 +540,15 @@ run_matmul_test \ # MLIR-AIR Matmul tests ################################################################### -if [ -d "$VITIS" ]; then - run_matmul_test \ - --name_prefix "ukern" \ - --lower_to_aie_pipeline "air" \ - --tile_pipeline "pad-pack" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --k "256" --n "256" \ - --use_ukernel "1" -fi +run_matmul_test \ + --name_prefix "ukern" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --m "256" --k "256" --n "256" \ + --vitis_path "${VITIS}" \ + --use_ukernel "1" # Example of a run with a group of 2+ matmuls. Currently this test is passed # the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if @@ -720,6 +719,7 @@ if [ -d "$VITIS" ]; then --lhs_rhs_type "bf16" \ --acc_type "f32" \ --num_repeat_runs "2" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ @@ -729,6 +729,7 @@ if [ -d "$VITIS" ]; then --lhs_rhs_type "bf16" \ --acc_type "f32" \ --num_repeat_runs "2" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" fi @@ -746,6 +747,7 @@ if [ -d "$VITIS" ]; then --n "32" \ --k "32" \ --use_chess "1" \ + --vitis_path "${VITIS}" \ --num_repeat_runs "10" run_matmul_test \ @@ -757,6 +759,7 @@ if [ -d "$VITIS" ]; then --k "64" \ --use_chess "1" \ --num_repeat_runs "10" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" run_matmul_test \ @@ -769,6 +772,7 @@ if [ -d "$VITIS" ]; then --n "32" \ --k "32" \ --use_chess "1" \ + --vitis_path "${VITIS}" \ --num_repeat_runs "10" fi diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh old mode 100644 new mode 100755 index 70c8693f5..d5001a215 --- a/build_tools/download_peano.sh +++ b/build_tools/download_peano.sh @@ -1,5 +1,5 @@ #!/bin/bash -RELEASE=19.0.0.2024082221+90abe71b +RELEASE=19.0.0.2024083101+42158757 pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly unzip llvm_aie*whl diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt index 3c7cd4d64..89971d7ef 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt @@ -10,10 +10,11 @@ iree_cc_library( NAME AIETargets SRCS - "AMDAIETargetBCF.cpp" - "AMDAIETargetCDODirect.cpp" - "AMDAIETargetLdScript.cpp" - "XCLBinGen.cpp" + AMDAIETargetBCF.cpp + AMDAIETargetCDODirect.cpp + AMDAIETargetLdScript.cpp + PeanoDriver.cpp + XCLBinGen.cpp DEPS iree-amd-aie::aie_runtime::iree_aie_runtime_static iree::target::amd-aie::Transforms @@ -28,9 +29,9 @@ iree_cc_library( NAME Target HDRS - "AIETarget.h" + AIETarget.h SRCS - "AIETarget.cpp" + AIETarget.cpp DEPS ::AIETargets iree-amd-aie::schemas::xrt_executable_def_c_fbs diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.cpp new file mode 100644 index 000000000..1db24a8d5 --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.cpp @@ -0,0 +1,105 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "PeanoDriver.h" + +#include +#include +#include + +#include "llvm/Support/Error.h" + +using Path = std::filesystem::path; + +void addExternCSystemInclude(std::vector &CC1Args, + const std::string &Path) { + CC1Args.push_back("-internal-externc-isystem"); + CC1Args.push_back(Path); +} + +void addSystemInclude(std::vector &CC1Args, + const std::string &Path) { + CC1Args.push_back("-internal-isystem"); + CC1Args.push_back(Path); +} + +void AddClangSystemIncludeArgs(std::vector &CC1Args, + const Path &peanoDir, const std::string &target, + bool novitisheaders, bool nostdlibinc) { + // Always include our instrinsics, for compatibility with existing toolchain. + if (!novitisheaders) { + std::string path; + if (target.rfind("aie2-", 0) == 0) { + path = peanoDir / "lib" / "clang" / "19" / "include" / "aiev2intrin.h"; + } else { + llvm::report_fatal_error(("unsupported target: " + target).c_str()); + } + CC1Args.push_back("-include"); + CC1Args.push_back(path); + } + + CC1Args.push_back("-D__AIENGINE__"); + if (target.rfind("aie2-", 0) == 0) { + CC1Args.push_back("-D__AIEARCH__=20"); + } else { + llvm::report_fatal_error(("unsupported target: " + target).c_str()); + } + + // Don't pull in system headers from /usr/include or /usr/local/include. + // All the basic headers that we need come from the compiler. + CC1Args.push_back("-nostdsysteminc"); + + if (nostdlibinc) return; + addExternCSystemInclude(CC1Args, peanoDir / "include" / target); +} + +void addLibCxxIncludePaths(std::vector &CC1Args, + const Path &peanoDir, const std::string &target, + bool nostdinc, bool nostdlibinc, bool nostdincxx) { + if (nostdinc || nostdlibinc || nostdincxx) return; + addSystemInclude(CC1Args, peanoDir / "include" / target / "c++" / "v1"); + // Second add the generic one. + addSystemInclude(CC1Args, peanoDir / "include" / "c++" / "v1"); +} + +void addOptTargetOptions(std::vector &CC1Args) { + // For now, we disable the auto-vectorizers by default, as the backend cannot + // handle many vector types. For experimentation the vectorizers can still be + // enabled explicitly by the user + CC1Args.push_back("-vectorize-loops=false"); + CC1Args.push_back("-vectorize-slp=false"); + // An if-then-else cascade requires at least 5 delay slots for evaluating the + // condition and 5 delay slots for one of the branches, thus speculating 10 + // instructions should be fine + CC1Args.push_back("--two-entry-phi-node-folding-threshold=10"); + // Make sure to perform most optimizations before mandatory inlinings, + // otherwise noalias attributes can get lost and hurt AA results. + CC1Args.push_back("-mandatory-inlining-before-opt=false"); + // Perform complete AA analysis on phi nodes. + CC1Args.push_back("-basic-aa-full-phi-analysis=true"); + // Extend the max limit of the search depth in BasicAA + CC1Args.push_back("-basic-aa-max-lookup-search-depth=10"); +} + +void addClangTargetOptions(std::vector &CC1Args, + const std::string &target) { + CC1Args.emplace_back("--target=" + target); + CC1Args.push_back("-fno-use-init-array"); + // Pass -fno-threadsafe-statics to prevent dependence on lock acquire/release + // handling for static local variables. + CC1Args.push_back("-fno-threadsafe-statics"); + std::vector peanoArgs; + addOptTargetOptions(peanoArgs); + CC1Args.reserve(CC1Args.size() + 2 * peanoArgs.size()); + for (const std::string &item : peanoArgs) { + CC1Args.emplace_back("-mllvm"); + CC1Args.emplace_back(item); + } +} + +// Avoid using newer dwarf versions, as the simulator doesn't understand newer +// dwarf. +unsigned getMaxDwarfVersion() { return 4; } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.h new file mode 100644 index 000000000..2f5aaac5a --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/PeanoDriver.h @@ -0,0 +1,28 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +#include "llvm/Support/Error.h" + +void AddClangSystemIncludeArgs(std::vector &CC1Args, + const std::filesystem::path &peanoDir, + const std::string &target, + bool novitisheaders = false, + bool nostdlibinc = false); + +void addLibCxxIncludePaths(std::vector &CC1Args, + const std::filesystem::path &peanoDir, + const std::string &target, bool nostdinc = false, + bool nostdlibinc = false, bool nostdincxx = false); + +void addOptTargetOptions(std::vector &CC1Args); +void addClangTargetOptions(std::vector &CC1Args, + const std::string &target); + +unsigned getMaxDwarfVersion(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 5de16906f..de69689cd 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -11,17 +11,20 @@ #include #include #include + // ReSharper disable once CppUnusedIncludeDirective #include #include #include "AMDAIETargets.h" +#include "PeanoDriver.h" #include "aievec/Passes.h" #include "iree-amd-aie/Transforms/Passes.h" #include "iree/compiler/Utils/ToolUtils.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" @@ -30,7 +33,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/MLIRContext.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVMIR/Export.h" @@ -88,15 +90,18 @@ static const std::string _MM_CC{ using namespace std::placeholders; using namespace llvm; using namespace mlir; -using namespace xilinx; using Path = std::filesystem::path; +using xilinx::AIE::DeviceOp; +using xilinx::AIE::getCoreOp; +using xilinx::AIE::TileOp; + namespace { FailureOr getTargetDir(const std::string &npuVersion) { if (npuVersion == "npu1") return std::string{"target_aie_ml"}; if (npuVersion == "npu4") return std::string{"target_aie2p"}; - llvm::errs() << "unsupported NPUVersion: " << npuVersion; + llvm::errs() << "unsupported NPUVersion: " << npuVersion << "\n"; return failure(); } @@ -152,7 +157,8 @@ FailureOr findVitis(std::optional &vitisDir, return failure(); } if (!std::filesystem::exists(licenseFile)) { - llvm::errs() << "ERROR: license file" << licenseFile << " does not exist"; + llvm::errs() << "ERROR: license file" << licenseFile << " does not exist" + << "\n"; return failure(); } } @@ -178,7 +184,7 @@ FailureOr findVitis(std::optional &vitisDir, return *vitisDir; } -static FailureOr findAMDAIETool(std::string toolName, +static FailureOr findAMDAIETool(const std::string &toolName, const Path &amdAIEInstallDir) { #if defined(_WIN32) toolName += ".exe"; @@ -215,7 +221,7 @@ std::pair> makeChessArgs( archVersion = "21"; modelDir = "aie2p"; } else { - llvm::errs() << "unsupported NPU version: " << npuVersion; + llvm::errs() << "unsupported NPU version: " << npuVersion << "\n"; llvm::report_fatal_error("unsupported NPU version"); } @@ -410,27 +416,6 @@ static LogicalResult assembleFileUsingChess( return runTool(xChessCCExe, args, verbose, env); } -std::vector makePeanoOptArgs() { - return { - // peano has no proper vectorization cost model for AIE - "-vectorize-loops=false", - // - "-vectorize-slp=false", - // An if-then-else cascade requires at least 5 delay slots for - // evaluating the condition and 5 delay slots for one of the - // branches, thus speculating 10 instructions should be fine - "--two-entry-phi-node-folding-threshold=10", - // Make sure to perform most optimizations before mandatory - // inlinings, otherwise noalias attributes can get lost and - // hurt AA results. - "-mandatory-inlining-before-opt=false", - // complete AA analysis on phi nodes. - "-basic-aa-full-phi-analysis=true", - // Extend the max limit of the search depth in BasicAA - "-basic-aa-max-lookup-search-depth=10", - }; -} - static LogicalResult assembleFileUsingPeano( const std::string &inputFile, const std::string &outputFile, const std::vector &extraArgs, Path &_tempDir, Path &peanoDir, @@ -439,21 +424,13 @@ static LogicalResult assembleFileUsingPeano( args.reserve(args.size() + std::distance(extraArgs.begin(), extraArgs.end())); args.insert(args.end(), extraArgs.begin(), extraArgs.end()); args.emplace_back("-O2"); + // TODO(max): pipe target arch in somehow - args.emplace_back("--target=aie2-none-unknown-elf"); - std::vector peanoArgs = makePeanoOptArgs(); - args.reserve(args.size() + peanoArgs.size()); - for (const std::string &item : peanoArgs) { - args.emplace_back("-mllvm"); - args.emplace_back(item); - } - args.emplace_back("-fno-use-init-array"); - // Pass -fno-threadsafe-statics to prevent dependence on lock acquire/release - // handling for static local variables. - args.emplace_back("-fno-threadsafe-statics"); - // Don't pull in system headers from /usr/include or /usr/local/include. - // All of the basic headers that we need come from the compiler. - args.emplace_back("-nostdsysteminc"); + std::string target = "aie2-none-unknown-elf"; + addClangTargetOptions(args, target); + AddClangSystemIncludeArgs(args, peanoDir, target); + addLibCxxIncludePaths(args, peanoDir, target); + args.emplace_back("-c"); args.emplace_back(inputFile); args.emplace_back("-o"); @@ -475,7 +452,7 @@ static FailureOr assembleStringUsing( if (auto maybeErr = dumpStrToDisk(inputFileStr, inputFile.string()); maybeErr.has_value()) { llvm::errs() << "Failed to dump to disk " << inputFile.string() - << " because: " << maybeErr; + << " because: " << maybeErr << "\n"; return failure(); } @@ -487,7 +464,8 @@ static FailureOr assembleStringUsing( } if (failed(assembler(inputFile.string(), outputFile.string(), extraArgs, workDir, toolDir, npuVersion, verbose))) { - llvm::errs() << "Failed to assemble " << outputFileName << ".o"; + llvm::errs() << "Failed to assemble " << outputFileName << ".o" + << "\n"; return failure(); } return outputFile; @@ -505,17 +483,17 @@ static_assert(std::is_same_v vitisDir, const std::string &targetArch, - bool verbose, Path peanoDir, const std::string &npuVersion, + DeviceOp deviceOp, const std::string &objFile, Path &tempDir, bool useChess, + std::optional vitisDir, const std::string &targetArch, bool verbose, + Path peanoDir, const std::string &npuVersion, const std::optional &ukernel) { - auto tileOps = deviceOp.getOps(); + auto tileOps = deviceOp.getOps(); std::string errorMessage; - for (AIE::TileOp tileOp : tileOps) { + for (TileOp tileOp : tileOps) { int col = tileOp.getCol(); int row = tileOp.getRow(); - auto coreOp = AIE::getCoreOp(tileOp); + auto coreOp = getCoreOp(tileOp); if (!coreOp) continue; std::string elfFileName; @@ -532,22 +510,31 @@ static LogicalResult generateCoreElfFiles( Path cwd = std::filesystem::current_path(); FailureOr mmObjectFilePath; if (ukernel && (ukernel == "mm" || ukernel == "all")) { - FailureOr maybeVitisDir = findVitis(vitisDir, npuVersion); - if (failed(maybeVitisDir)) { - llvm::errs() << "compiling ukernels currently requires chess (even if " - "you're using peano)"; - return failure(); - } if (!std::filesystem::exists(cwd / "mm.o")) { - mmObjectFilePath = assembleStringUsingChess( - /*inputFileStr=*/_MM_CC, - /*inputFileName=*/"mm.cc", - /*outputFileName=*/"mm.o", - /*outputDir=*/cwd, - /*extraArgs*/ std::vector{}, - /*workDir=*/tempDir, - /*vitisDir=*/*maybeVitisDir, - /*npuVersion*/ npuVersion, verbose); + if (useChess) { + if (verbose) llvm::outs() << "using chess for ukernel codegen\n"; + FailureOr maybeVitisDir = findVitis(vitisDir, npuVersion); + mmObjectFilePath = assembleStringUsingChess( + /*inputFileStr=*/_MM_CC, + /*inputFileName=*/"mm.cc", + /*outputFileName=*/"mm.o", + /*outputDir=*/cwd, + /*extraArgs*/ std::vector{}, + /*workDir=*/tempDir, + /*vitisDir=*/*maybeVitisDir, + /*npuVersion*/ npuVersion, verbose); + } else { + if (verbose) llvm::outs() << "using peano for ukernel codegen\n"; + mmObjectFilePath = assembleStringUsingPeano( + /*inputFileStr=*/_MM_CC, + /*inputFileName=*/"mm.cc", + /*outputFileName=*/"mm.o", + /*outputDir=*/cwd, + /*extraArgs*/ std::vector{}, + /*workDir=*/tempDir, + /*peanoDir=*/peanoDir, + /*npuVersion*/ npuVersion, verbose); + } if (failed(mmObjectFilePath)) return failure(); } else { mmObjectFilePath = cwd / "mm.o"; @@ -579,13 +566,14 @@ static LogicalResult generateCoreElfFiles( { auto bcfOutput = openOutputFile(bcfPath.string(), &errorMessage); if (!bcfOutput) { - llvm::errs() << "failed to open bcf file because: " << errorMessage; + llvm::errs() << "failed to open bcf file because: " << errorMessage + << "\n"; return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToBCF( deviceOp, bcfOutput->os(), col, row))) { - llvm::errs() << "Failed to generate BCF"; + llvm::errs() << "Failed to generate BCF\n"; return failure(); } bcfOutput->keep(); @@ -614,7 +602,7 @@ static LogicalResult generateCoreElfFiles( openOutputFile(ldscriptPath.string(), &errorMessage); if (!ldscriptOutput) { llvm::errs() << "Failed to open ldscript file because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( @@ -637,8 +625,8 @@ static LogicalResult generateCoreElfFiles( flags.emplace_back("-o"); flags.emplace_back(elfFile.string()); if (verbose) flags.emplace_back("-v"); - // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected - // automatically into the ld.lld invocation + // we run clang (ie cc) but really this is a linker call where libc, libm, + // crt0/1 paths are injected automatically into the ld.lld invocation if (failed( runTool((peanoDir / "bin" / "clang").string(), flags, verbose))) { return failure(); @@ -648,13 +636,13 @@ static LogicalResult generateCoreElfFiles( return success(); } -static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, +static LogicalResult generateCDO(MLIRContext *context, DeviceOp deviceOp, const Path &tempDir) { auto copy = cast(deviceOp.getParentOp()->clone()); - deviceOp = *copy.getOps().begin(); + deviceOp = *copy.getOps().begin(); if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToCDODirect( deviceOp, tempDir.string()))) { - llvm::errs() << "failed to emit CDO"; + llvm::errs() << "failed to emit CDO\n"; return failure(); } copy->erase(); @@ -750,7 +738,7 @@ static LogicalResult generateXCLBin( dumpStrToDisk(memTopologyData, memTopologyJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk mem_topology.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -797,7 +785,7 @@ static LogicalResult generateXCLBin( dumpStrToDisk(aiePartitionJsonData, aiePartitionJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk aie_partition.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -816,7 +804,7 @@ static LogicalResult generateXCLBin( if (auto maybeErr = dumpStrToDisk(kernelStr, kernelsJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk kernels.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -825,7 +813,8 @@ static LogicalResult generateXCLBin( { auto designBifOut = openOutputFile(designBifFile.string(), &errorMessage); if (!designBifOut) { - llvm::errs() << "failed to open design.bif because: " << errorMessage; + llvm::errs() << "failed to open design.bif because: " << errorMessage + << "\n"; return failure(); } @@ -868,7 +857,7 @@ static LogicalResult generateXCLBin( } if (iree_aie_bootgen_main(cstrings.size(), const_cast(&cstrings[0]))) { - llvm::errs() << "failed to execute bootgen"; + llvm::errs() << "failed to execute bootgen\n"; return failure(); } } @@ -892,14 +881,14 @@ static LogicalResult generateXCLBin( "--force", "--input", *inputXclbin}; if (failed(runTool(xclbinutilBin.value().string(), inputFlags, verbose))) { - llvm::errs() << "failed to execute xclbinutil"; + llvm::errs() << "failed to execute xclbinutil\n"; return failure(); } auto aieInputPartitionOut = openInputFile(aieInputPartitionJsonFile.string(), &errorMessage); if (!aieInputPartitionOut) { llvm::errs() << "failed to open aie_input_partition.json because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } Expected aieInputPartitionOutValue = @@ -913,7 +902,7 @@ static LogicalResult generateXCLBin( if (!aiePartitionOut) { llvm::errs() << "failed to open aie aie_input_partition.json for " "output because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } llvm::Expected aiePartitionOutValue = @@ -931,7 +920,7 @@ static LogicalResult generateXCLBin( maybeErr.has_value()) { llvm::errs() << "failed to dump to disk aie_input_partition.json because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } flags.insert(flags.end(), {"--input", *inputXclbin}); @@ -1011,7 +1000,7 @@ struct RemoveAlignment2FromLLVMLoadPass } // namespace static LogicalResult generateUnifiedObject( - MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile, + MLIRContext *context, DeviceOp deviceOp, const std::string &outputFile, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, bool useChess, bool verbose, Path &tempDir, std::optional vitisDir, const std::string &targetArch, Path &peanoDir, @@ -1021,7 +1010,7 @@ static LogicalResult generateUnifiedObject( ModuleOp moduleOpCopy = cast(deviceOp->getParentOp()).clone(); - PassManager pm(context, moduleOpCopy.getOperationName()); + PassManager pm(context, mlir::ModuleOp::getOperationName()); applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); @@ -1040,14 +1029,14 @@ static LogicalResult generateUnifiedObject( } if (failed(pm.run(moduleOpCopy))) { - llvm::errs() << "Failed to lower to LLVM"; + llvm::errs() << "Failed to lower to LLVM\n"; return failure(); } llvm::LLVMContext llvmContext; auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext); if (!llvmModule) { - llvm::errs() << "Failed to translate module to LLVMIR"; + llvm::errs() << "Failed to translate module to LLVMIR\n"; return failure(); } @@ -1081,7 +1070,7 @@ static LogicalResult generateUnifiedObject( if (auto maybeErr = dumpStrToDisk(inputLLStr, LLVMIRFile.string()); maybeErr.has_value()) { llvm::errs() << "Failed to dump to disk input.ll" - << " because: " << maybeErr; + << " because: " << maybeErr << "\n"; return failure(); } Path peanoOptBin = peanoDir / "bin" / "opt"; @@ -1092,11 +1081,12 @@ static LogicalResult generateUnifiedObject( "-O2", "--inline-threshold=10", "-S", LLVMIRFile.string(), // missing from libc "--disable-builtin=memset", "-o", OptLLVMIRFile.string()}; - std::vector peanoArgs = makePeanoOptArgs(); + std::vector peanoArgs; + addOptTargetOptions(peanoArgs); args.reserve(args.size() + peanoArgs.size()); args.insert(args.end(), peanoArgs.begin(), peanoArgs.end()); if (failed(runTool(peanoOptBin.string(), args, verbose))) { - llvm::errs() << "Failed to optimize ll with peano"; + llvm::errs() << "Failed to optimize ll with peano\n"; return failure(); } @@ -1115,7 +1105,7 @@ static LogicalResult generateUnifiedObject( return success(); } -FailureOr> getNpuInstructions(AIE::DeviceOp deviceOp) { +FailureOr> getNpuInstructions(DeviceOp deviceOp) { MLIRContext *ctx = deviceOp.getContext(); mlir::Attribute maybeNpuInstructions = deviceOp->getAttr("npu_instructions"); if (!maybeNpuInstructions) { @@ -1140,7 +1130,7 @@ FailureOr> getNpuInstructions(AIE::DeviceOp deviceOp) { } LogicalResult aie2xclbin( - MLIRContext *ctx, AIE::DeviceOp deviceOp, const std::string &outputNPU, + MLIRContext *ctx, DeviceOp deviceOp, const std::string &outputNPU, const std::string &outputXCLBin, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, const std::string &tempDir, bool useChess, bool verbose, @@ -1153,7 +1143,7 @@ LogicalResult aie2xclbin( FailureOr> maybeNpuInstructions = getNpuInstructions(deviceOp); if (failed(maybeNpuInstructions)) { - assert(false && "Failed to get NPU instructions"); + llvm::errs() << "Failed to get NPU instructions"; return failure(); } ArrayRef npuInstructions = maybeNpuInstructions.value();