From acacc14a3d55b1053861ec95e4e0469a8f32b3d4 Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Fri, 23 Aug 2024 14:02:29 -0500 Subject: [PATCH] branch on driver datetime --- .github/workflows/ci-linux.yml | 6 +- build_tools/ci/cpu_comparison/run_test.py | 71 ++++++++++++------- .../AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 20 ++++++ .../AMD-AIE/iree-amd-aie/Target/AIETarget.h | 1 - .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 71 +++++++------------ .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.h | 6 +- 6 files changed, 99 insertions(+), 76 deletions(-) diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 989b8fc7a..b14c26c91 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -91,10 +91,12 @@ jobs: test_linux: name: E2E Test linux - runs-on: nod-nuc-linux-2 needs: build_and_ctest strategy: - fail-fast: true + fail-fast: false + matrix: + runs-on: [linux-phoenix-20240606, linux-phoenix-20240819] + runs-on: ${{ matrix.runs-on }} env: XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic steps: diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index be6dd244a..0f11e6931 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -306,6 +306,15 @@ def __init__( if xrt_hash: self.xrt_hash = xrt_hash[0] + xdna_datetime = re.findall( + # eg 2.18.0_20240606 + r"amdxdna\s+:\s\d\.\d+\.\d+_(\d+)", + xrt_info, + flags=re.MULTILINE | re.IGNORECASE, + ) + if xdna_datetime: + self.xdna_datetime = int(xdna_datetime[0]) + # Try and get the peano commit hash. This is a bit of a hack, if it fails # peano_commit_has is left as "undetermined". self.peano_commit_hash = "undetermined" @@ -589,38 +598,45 @@ def run(self, config): test_files_dir = config.file_dir / "test_files" output_dir = config.output_dir - for name in [ - "two_matmul_switching", - "matmul_f32_8_8_4", - "matmul_f32_8_4_8", - ]: - aie_vs_llvm_cpu(config, test_files_dir / f"{name}.mlir") - - aie_vs_llvm_cpu( - config, - test_files_dir / "three_matmuls.mlir", - function_name="three_$mm$", - ) - - # Test(s) of the form matmul(A,B) where A:MxK, B:KxN - test_name = output_dir / "test_from_template.mlir" - template_name = matmul_template_dir / "matmul_MxK_KxN.mlir" - generate_matmul_test(test_name, template_name, 32, 32, 64, "bf16", "f32") - aie_vs_llvm_cpu(config, test_name) - - # Test(s) of the form matmul(A,B) + C where A:MxK, B:KxN, C:N - test_name = output_dir / "test_from_template_bias_N.mlir" - template_name = matmul_template_dir / "matmul_bias_MxK_KxN_N.mlir" - generate_matmul_test(test_name, template_name, 1024, 1024, 512, "bf16", "f32") - aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", use_ukernel=True) - aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", use_ukernel=False) - # Test(s) of the form matmul(A,B) + C where A:MxK, B:KxN, C:MxN test_name = output_dir / "test_from_template_full_bias.mlir" template_name = matmul_template_dir / "matmul_bias_MxK_KxN_MxN.mlir" generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32") aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0) + if config.xdna_datetime and config.xdna_datetime < 20240819: + for name in [ + "two_matmul_switching", + "matmul_f32_8_8_4", + "matmul_f32_8_4_8", + ]: + aie_vs_llvm_cpu(config, test_files_dir / f"{name}.mlir") + + aie_vs_llvm_cpu( + config, + test_files_dir / "three_matmuls.mlir", + function_name="three_$mm$", + ) + + # Test(s) of the form matmul(A,B) where A:MxK, B:KxN + test_name = output_dir / "test_from_template.mlir" + template_name = matmul_template_dir / "matmul_MxK_KxN.mlir" + generate_matmul_test(test_name, template_name, 32, 32, 64, "bf16", "f32") + aie_vs_llvm_cpu(config, test_name) + + # Test(s) of the form matmul(A,B) + C where A:MxK, B:KxN, C:N + test_name = output_dir / "test_from_template_bias_N.mlir" + template_name = matmul_template_dir / "matmul_bias_MxK_KxN_N.mlir" + generate_matmul_test( + test_name, template_name, 1024, 1024, 512, "bf16", "f32" + ) + aie_vs_llvm_cpu( + config, test_name, tile_pipeline="pack-peel", use_ukernel=True + ) + aie_vs_llvm_cpu( + config, test_name, tile_pipeline="pack-peel", use_ukernel=False + ) + class SmokeSet(TestSet): def __init__(self): @@ -841,6 +857,8 @@ def all_tests( default="", ) + parser.add_argument("--driver-hash", type=str) + args = parser.parse_args() test_set_list = args.test_set.split(",") @@ -854,6 +872,7 @@ def all_tests( args.verbose, args.reset_npu_between_runs, args.do_not_run_aie, + args.drive_date, test_set_list, args.additional_aie_compilation_flags, ) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 515cc5be1..d552a9e64 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -363,6 +363,24 @@ LogicalResult AIETargetBackend::serializeExecutable( deviceOps[i] = cast(repl); } + // TODO(max): this should be an enum + // TODO(max): this needs to be pulled from PCIE + std::string npuVersion; + switch (clAMDAIETargetDevice) { + case AMDAIEDevice::npu1: + case AMDAIEDevice::npu1_1col: + case AMDAIEDevice::npu1_2col: + case AMDAIEDevice::npu1_3col: + case AMDAIEDevice::npu1_4col: + npuVersion = "npu1"; + break; + case AMDAIEDevice::npu4: + npuVersion = "npu4"; + break; + default: + llvm::report_fatal_error("unhandled NPU partitioning.\n"); + } + if (failed(aie2xclbin( /*ctx=*/variantOp->getContext(), deviceOps[i], /*outputNPU=*/npuInstPath.str().str(), @@ -377,7 +395,9 @@ LogicalResult AIETargetBackend::serializeExecutable( /*vitisDir=*/options.vitisInstallDir.empty() ? std::nullopt : std::optional{options.vitisInstallDir}, + // TODO(max): not right for strix /*targetArch=*/"AIE2", + /*npuVersion=*/npuVersion, /*peanoDir=*/options.peanoInstallDir, /*xclBinKernelID=*/ordinalHex.str(), /*xclBinKernelName=*/entryPointNamesFb[ordinal], diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h index cb5ccbae8..1e5691c87 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h @@ -43,7 +43,6 @@ struct AMDAIEOptions { // Print MLIR timing summary for the MLIR passes in aie2xclbin. bool aie2xclbinTiming{false}; - public: void bindOptions(OptionsBinder &binder) { static llvm::cl::OptionCategory category("AMD AIE Options"); binder.opt( diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 31af9f905..847bf0f33 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -92,25 +92,7 @@ using Path = std::filesystem::path; namespace { -FailureOr getNPUVersion() { - // std::string errorMessage; - // auto maybeVbnv = - // openInputFile("/sys/bus/pci/devices/0000:c5:00.1/vbnv", &errorMessage); - // if (!maybeVbnv) { - // llvm::errs() << "couldn't read pci info for npu because: " << errorMessage; - // return failure(); - // } - // - // std::string vbnv = std::string{maybeVbnv->getBuffer()}; - // std::regex rgx("RyzenAI-(.*)"); - // std::smatch matches; - // if (std::regex_search(vbnv, matches, rgx)) return {matches[1]}; - // llvm::errs() << "couldn't find npu version in " << vbnv; - return std::string{"npu1"}; -} - -FailureOr getTargetDir() { - std::string npuVersion = *getNPUVersion(); +FailureOr getTargetDir(const std::string &npuVersion) { if (npuVersion == "npu1") return std::string{"target_aie_ml"}; if (npuVersion == "npu4") return std::string{"target_aie2p"}; llvm::errs() << "unsupported NPUVersion: " << npuVersion; @@ -216,10 +198,9 @@ static FailureOr findAMDAIETool(std::string toolName, return failure(); } -std::pair> makeChessArgs(Path &vitisDir, - Path &tempDir, - bool verbose) { - std::string npuVersion = *getNPUVersion(); +std::pair> makeChessArgs( + Path &vitisDir, Path &tempDir, const std::string &npuVersion, + bool verbose) { std::string archVersion; std::string modelDir; if (npuVersion == "npu1") { @@ -259,10 +240,11 @@ std::pair> makeChessArgs(Path &vitisDir, flags}; } -std::vector makeChessEnv(Path &vitisDir) { +std::vector makeChessEnv(Path &vitisDir, + const std::string &npuVersion) { Path aieToolsPath = vitisDir / "aietools"; - Path chessccPath = - aieToolsPath / "tps" / "lnx64" / *getTargetDir() / "bin" / "LNa64bin"; + Path chessccPath = aieToolsPath / "tps" / "lnx64" / + *getTargetDir(npuVersion) / "bin" / "LNa64bin"; Path path(::getenv("PATH")); Path lnx64o = aieToolsPath / "lib" / "lnx64.o"; Path dotLib = aieToolsPath / "lnx64" / "tools" / "dot" / "lib"; @@ -385,15 +367,16 @@ static std::optional runTool( static LogicalResult assembleFileUsingChess( const std::string &inputFile, const std::string &outputFile, const std::vector &extraArgs, Path &tempDir, Path &vitisDir, - bool verbose) { - auto [xChessCCExe, args] = makeChessArgs(vitisDir, tempDir, verbose); + const std::string &npuVersion, bool verbose) { + auto [xChessCCExe, args] = + makeChessArgs(vitisDir, tempDir, npuVersion, verbose); args.reserve(args.size() + std::distance(extraArgs.begin(), extraArgs.end())); args.insert(args.end(), extraArgs.begin(), extraArgs.end()); args.emplace_back("-c"); args.emplace_back(inputFile); args.emplace_back("-o"); args.emplace_back(outputFile); - std::vector env = makeChessEnv(vitisDir); + std::vector env = makeChessEnv(vitisDir, npuVersion); if (!runTool(xChessCCExe, args, verbose, env)) { llvm::errs() << "Failed to assemble " << inputFile << " with chess"; return failure(); @@ -426,7 +409,7 @@ std::vector makePeanoOptArgs() { static LogicalResult assembleFileUsingPeano( const std::string &inputFile, const std::string &outputFile, const std::vector &extraArgs, Path &_tempDir, Path &peanoDir, - bool verbose) { + const std::string &_npuVersion, bool verbose) { std::vector args; args.reserve(args.size() + std::distance(extraArgs.begin(), extraArgs.end())); args.insert(args.end(), extraArgs.begin(), extraArgs.end()); @@ -466,7 +449,7 @@ static FailureOr assembleStringUsing( const FileAssemblerT &assembler, const std::string &inputFileStr, const std::string &inputFileName, const std::string &outputFileName, Path &outputDir, const std::vector &extraArgs, Path &workDir, - Path &toolDir, bool verbose = false) { + Path &toolDir, const std::string &npuVersion, bool verbose = false) { Path inputFile = workDir / inputFileName; if (auto maybeErr = dumpStrToDisk(inputFileStr, inputFile.string()); maybeErr.has_value()) { @@ -482,7 +465,7 @@ static FailureOr assembleStringUsing( outputFile = outputFileName; } if (failed(assembler(inputFile.string(), outputFile.string(), extraArgs, - workDir, toolDir, verbose))) { + workDir, toolDir, npuVersion, verbose))) { llvm::errs() << "Failed to assemble " << outputFileName << ".o"; return failure(); } @@ -503,7 +486,8 @@ static_assert(std::is_same_v vitisDir, const std::string &targetArch, - bool verbose, Path peanoDir, const std::optional &ukernel) { + bool verbose, Path peanoDir, const std::string &npuVersion, + const std::optional &ukernel) { auto tileOps = deviceOp.getOps(); std::string errorMessage; @@ -585,7 +569,7 @@ static LogicalResult generateCoreElfFiles( } auto [xChessCCExe, chessArgs] = - makeChessArgs(*vitisDir, tempDir, verbose); + makeChessArgs(*vitisDir, tempDir, npuVersion, verbose); chessArgs.emplace_back(objFile); chessArgs.emplace_back(chessIntrinsicsObjFile->string()); if (ukernel && (ukernel == "mm" || ukernel == "all")) { @@ -595,7 +579,7 @@ static LogicalResult generateCoreElfFiles( chessArgs.emplace_back(bcfPath.string()); chessArgs.emplace_back("-o"); chessArgs.emplace_back(elfFile.string()); - std::vector env = makeChessEnv(*vitisDir); + std::vector env = makeChessEnv(*vitisDir, npuVersion); if (!runTool(xChessCCExe, chessArgs, verbose, env)) { llvm::errs() << "Failed to link with xbridge"; return failure(); @@ -647,7 +631,6 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, const Path &tempDir) { - auto copy = cast(deviceOp.getParentOp()->clone()); deviceOp = *copy.getOps().begin(); @@ -1033,8 +1016,8 @@ static LogicalResult generateUnifiedObject( MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, bool useChess, bool verbose, Path tempDir, - std::optional vitisDir, const std::string &targetArch, - Path peanoDir) { + std::optional vitisDir, const std::string &targetArch, Path peanoDir, + const std::string &npuVersion) { assert(deviceOp->getParentOp() && isa(deviceOp->getParentOp()) && "DeviceOp must be in a module parent"); @@ -1086,7 +1069,7 @@ static LogicalResult generateUnifiedObject( /*outputDir=*/tempDir, /*extraArgs*/ std::vector{}, /*workDir=*/tempDir, - /*vitisDir=*/*maybeVitisDir, + /*vitisDir=*/*maybeVitisDir, /*npuVersion*/ npuVersion, /*verbose=*/verbose); if (failed(chessIntrinsicsObjFile)) { return failure(); @@ -1160,9 +1143,9 @@ LogicalResult aie2xclbin( bool printIRAfterAll, bool printIRModuleScope, bool timing, const std::string &tempDir, bool useChess, bool verbose, const std::optional &vitisDir, const std::string &targetArch, - const std::string &peanoDir, const std::string &xclBinKernelID, - const std::string &xclBinKernelName, const std::string &xclBinInstanceName, - const std::string &amdAIEInstallDir, + const std::string &npuVersion, const std::string &peanoDir, + const std::string &xclBinKernelID, const std::string &xclBinKernelName, + const std::string &xclBinInstanceName, const std::string &amdAIEInstallDir, const std::optional &InputXCLBin, const std::optional &ukernel) { PassManager pm(ctx, AIE::DeviceOp::getOperationName()); @@ -1193,12 +1176,12 @@ LogicalResult aie2xclbin( if (failed(generateUnifiedObject( ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir, - targetArch, peanoDir))) + targetArch, peanoDir, npuVersion))) return deviceOp.emitOpError("Failed to generate unified object"); if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDir, useChess, vitisDir, targetArch, verbose, - peanoDir, ukernel))) + peanoDir, npuVersion, ukernel))) return deviceOp.emitOpError("Failed to generate core ELF file(s)"); if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h index 290064170..0e20d0c42 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.h @@ -17,8 +17,8 @@ mlir::LogicalResult aie2xclbin( bool printIRAfterAll, bool printIRModuleScope, bool timing, const std::string &tempDir, bool useChess, bool verbose, const std::optional &vitisDir, const std::string &targetArch, - const std::string &peanoDir, const std::string &xclBinKernelID, - const std::string &xclBinKernelName, const std::string &xclBinInstanceName, - const std::string &amdAIEInstallDir, + const std::string &npuVersion, const std::string &peanoDir, + const std::string &xclBinKernelID, const std::string &xclBinKernelName, + const std::string &xclBinInstanceName, const std::string &amdAIEInstallDir, const std::optional &InputXCLBin, const std::optional &ukernel);