From ed32041befad40842944f62e0475d8ceb153c5f9 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Mon, 19 Aug 2024 17:15:38 -0500
Subject: [PATCH 01/28] [WIP] windows e2e

---
 build_tools/ci/cpu_comparison/run_test.py | 5 +++++
 build_tools/download_peano.sh             | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 1cc3b53fa..e868fa221 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -4,6 +4,7 @@
 
 import argparse
 import os
+import platform
 import re
 import subprocess
 import time
@@ -46,6 +47,10 @@ def find_executable(install_dir: Path, executable_name):
         install_dir / "bin",
         install_dir / "tools",
     ]
+
+    if platform.system() == "Windows":
+        executable_name += ".exe"
+
     for directory in search_dirs:
         executable_path = directory / executable_name
         if executable_path.is_file():
diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh
index 02b14a28e..a8c41957b 100644
--- a/build_tools/download_peano.sh
+++ b/build_tools/download_peano.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-RELEASE=19.0.0.2024072901+debfcac7
+RELEASE=19.0.0.2024081918+69415c19
 pip download -q llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly
 unzip -q llvm_aie*whl

From ce8be1461cd5bdf2bd649e5817ba0aefbf16d32d Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Tue, 20 Aug 2024 04:32:09 -0500
Subject: [PATCH 02/28] 50% of the way there

---
 .github/workflows/ci-linux.yml                |   3 +-
 build_tools/ci/cpu_comparison/run_test.py     |  82 ++---
 build_tools/download_peano.sh                 |   4 +-
 cmake/iree_aie_xrt.cmake                      |  40 ++-
 .../iree-amd-aie/Target/CMakeLists.txt        |   2 +-
 .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 327 ++++++++++--------
 .../aie_runtime/iree_aie_configure.cc         |  10 +
 .../aie_runtime/iree_aie_runtime.h            |   4 +-
 8 files changed, 261 insertions(+), 211 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index b14c26c91..4b4ca2a53 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -199,7 +199,8 @@ jobs:
             iree-install \
             $PWD/llvm-aie \
             /opt/xilinx/xrt \
-            /opt/Xilinx/Vitis/2024.2
+            /opt/Xilinx/Vitis/2024.2 \
+            --reset-npu-between-runs
 
       - name: Printing IR from aie2xclbin
         run: |
diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index e868fa221..ff8faa090 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -261,10 +261,16 @@ def __init__(
         self.xrt_hash = "undetermined"
         self.xrt_release = "undetermined"
         self.peano_commit_hash = "undetermined"
-        xrt_bin_dir = xrt_dir / "bin"
-        xrt_smi_exe = xrt_bin_dir / "xrt-smi"
+        xrt_bin_dir = xrt_dir
+        if platform.system() != "Windows":
+            xrt_bin_dir /= "bin"
+        xrt_smi_exe = xrt_bin_dir / (
+            "xrt-smi" + ".exe" if platform.system() == "Windows" else ""
+        )
         if not xrt_smi_exe.exists():
-            xrt_smi_exe = xrt_bin_dir / "xbutil"
+            xrt_smi_exe = xrt_bin_dir / (
+                "xbutil" + ".exe" if platform.system() == "Windows" else ""
+            )
         if not xrt_smi_exe.exists():
             raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}")
 
@@ -681,7 +687,7 @@ def run(self, config):
         )
 
 
-def getTestPartition():
+def get_test_partition():
     return [ConvolutionSet(), MatmulSet(), SmokeSet()]
 
 
@@ -746,9 +752,10 @@ def all_tests(
     verify_determinism()
 
     # Verify a very basic script runs before running the more complex tests
-    shell_out(["pwd"], verbose=config.verbose)
+    if platform.system() != "Windows":
+        shell_out(["pwd"], verbose=config.verbose)
 
-    partition = getTestPartition()
+    partition = get_test_partition()
     partition_names = [p.name for p in partition]
     map_to_partition = {p.name: p for p in partition}
     if "All" in test_set:
@@ -789,54 +796,48 @@ def all_tests(
     parser.add_argument("iree_install_dir", type=abs_path)
     parser.add_argument("peano_install_dir", type=abs_path)
     parser.add_argument("xrt_dir", type=abs_path)
-    parser.add_argument("vitis_dir", type=abs_path)
+    parser.add_argument("--vitis-dir", type=abs_path)
 
     # TODO(newling) make bool options boolean, not integer (tried but had issues)
     parser.add_argument(
-        "--return_on_fail",
+        "--return-on-fail",
         nargs="?",
         default=1,
         type=int,
-        help=(
-            "If 0, then the script will continue running even if a test fails, "
-            "enumerating all failures. Otherwise the script will exit on the first failure."
+        help=dedent(
+            """
+            If 0, then the script will continue running even if a test fails,
+            enumerating all failures. Otherwise the script will exit on the first failure.
+            """
         ),
     )
 
-    parser.add_argument(
-        "--verbose",
-        nargs="?",
-        default=1,
-        type=int,
-        help="If 0, then print statements are suppressed, otherwise they are printed.",
-    )
+    parser.add_argument("--verbose", action="store_true")
 
     parser.add_argument(
-        "--reset_npu_between_runs",
-        nargs="?",
-        default=1,
-        type=int,
+        "--reset-npu-between-runs",
+        action="store_true",
         help=(
-            "If 0 then the NPU is not reset between runs, otherwise it is reset. "
+            "If passed then the NPU is not reset between runs, otherwise it is reset. "
             "Resetting between runs can in theory help avoid certain types of "
             "errors in parts of the stack which these tests are not designed to catch."
         ),
     )
 
     parser.add_argument(
-        "--do_not_run_aie",
-        nargs="?",
-        default=0,
-        type=int,
-        help=(
-            "If 1, then the AIE backend will not be run. This is useful for "
-            "ensuring that everything up to the AIE run and numerical comparison "
-            "is working correctly, for example if you are not on a device with "
-            "working AIE HW and runtime."
+        "--do-not-run-aie",
+        action="store_true",
+        help=dedent(
+            """
+            If passed, then the AIE backend will not be run. This is useful for
+            ensuring that everything up to the AIE run and numerical comparison
+            is working correctly, for example if you are not on a device with
+            working AIE HW and runtime."
+            """
         ),
     )
 
-    partition = getTestPartition()
+    partition = get_test_partition()
     partition_names = [p.name for p in partition]
     partition_names_and_all = partition_names + ["All"]
     help_string = (
@@ -845,19 +846,22 @@ def all_tests(
     )
 
     parser.add_argument(
-        "--test_set",
+        "--test-set",
         type=str,
         help=help_string,
         default="All",
     )
 
     parser.add_argument(
-        "--additional_aie_compilation_flags",
+        "--additional-aie-compilation-flags",
         type=str,
-        help=(
-            "Additional flags to pass to the AIE compiler, for all tests. "
-            "Example, do print the IR between passes during compilation you might have: "
-            ' --additional_aie_compilation_flags="--mlir-print-ir-before-all --mlir-print-ir-module-scope --aie2xclbin-print-ir-before-all --aie2xclbin-print-ir-module-scope"'
+        help=dedent(
+            """
+            Additional flags to pass to the AIE compiler, for all tests.
+            Example, do print the IR between passes during compilation you might have:
+            --additional_aie_compilation_flags="--mlir-print-ir-before-all --mlir-print-ir-module-scope
+            --aie2xclbin-print-ir-before-all --aie2xclbin-print-ir-module-scope"'
+            """
         ),
         default="",
     )
diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh
index a8c41957b..8c20a7560 100644
--- a/build_tools/download_peano.sh
+++ b/build_tools/download_peano.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
 RELEASE=19.0.0.2024081918+69415c19
-pip download -q llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly
-unzip -q llvm_aie*whl
+pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly
+unzip llvm_aie*whl
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index 0811fbda4..5879e21cd 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -4,7 +4,7 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-if(TARGET iree-aie-xclbinutil)
+if(TARGET iree_aie_xrt_iree-aie-xclbinutil)
   return()
 endif()
 
@@ -134,32 +134,38 @@ list(REMOVE_ITEM _xclbinutil_srcs "${_xclbinutil_source_dir}/SectionSmartNic.cxx
 # and then --add-replace-section:MEM_TOPOLOGY won't work...
 # XRT/src/runtime_src/tools/xclbinutil/SectionMemTopology.cxx#L26-L41
 # TODO(max): and for whatever reason -WL,--whole-archive doesn't work
-add_executable(iree-aie-xclbinutil ${_xclbinutil_srcs})
+set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}")
+set(IREE_PACKAGE_ROOT_PREFIX "iree::aie::xrt")
+iree_cc_binary(
+  NAME
+    # if you rename this be sure to update the if(...) return up top
+    # otherwise this script will be entered twice and you'll get a confusing error like
+    # "can't do add_executable; target already exists"
+    iree-aie-xclbinutil
+  SRCS
+    ${_xclbinutil_srcs}
+  COPTS
+    $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
+    $<$<PLATFORM_ID:Windows>:/EHsc /GR>
+  DEFINES
+    BOOST_BIND_GLOBAL_PLACEHOLDERS
+  INSTALL_COMPONENT
+    IREETools-Runtime
+  PUBLIC
+)
 
-target_compile_definitions(iree-aie-xclbinutil
-                           PRIVATE
-                           -DBOOST_BIND_GLOBAL_PLACEHOLDERS)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
-target_link_libraries(iree-aie-xclbinutil
+target_link_libraries(iree_aie_xrt_iree-aie-xclbinutil
                       PRIVATE
                       Threads::Threads
                       $<BUILD_LOCAL_INTERFACE:${IREE_AIE_BOOST_LIBS}>
                       $<$<PLATFORM_ID:Linux>:$<BUILD_LOCAL_INTERFACE:transformcdo>>)
-target_include_directories(iree-aie-xclbinutil
+target_include_directories(iree_aie_xrt_iree-aie-xclbinutil
                            PRIVATE ${XRT_BINARY_DIR}/gen
                                    ${IREE_XRT_SOURCE_DIR}/runtime_src/core/include
                                    ${_xclbinutil_source_dir})
-target_compile_options(iree-aie-xclbinutil
-                       PRIVATE
-                       $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
-                       $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
-set_target_properties(iree-aie-xclbinutil
+set_target_properties(iree_aie_xrt_iree-aie-xclbinutil
                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tools")
-iree_install_targets(
-  TARGETS iree-aie-xclbinutil
-  COMPONENT IREETools-Runtime
-  EXPORT_SET Runtime
-)
 
 # ##############################################################################
 # xrt_coreutil
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
index 3c7cd4d64..1d9e94123 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
@@ -21,7 +21,7 @@ iree_cc_library(
 )
 
 if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
-  add_dependencies(iree_target_amd-aie_Target_AIETargets iree-aie-xclbinutil)
+  add_dependencies(iree_target_amd-aie_Target_AIETargets iree_aie_xrt_iree-aie-xclbinutil)
 endif()
 
 iree_cc_library(
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 21afa7b97..17f2e24b8 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -7,7 +7,6 @@
 #include "XCLBinGen.h"
 
 #include <filesystem>
-#include <fstream>
 #include <functional>
 #include <random>
 #include <regex>
@@ -179,7 +178,10 @@ FailureOr<Path> findVitis(std::optional<Path> &vitisDir,
 
 static FailureOr<Path> findAMDAIETool(std::string toolName,
                                       const Path &amdAIEInstallDir) {
-  Path toolBinExe = "";
+#if defined(_WIN32)
+  toolName += ".exe";
+#endif  // _WIN32
+  Path toolBinExe;
   if (!amdAIEInstallDir.empty()) {
     toolBinExe = amdAIEInstallDir / toolName;
     if (std::filesystem::exists(toolBinExe)) return toolBinExe;
@@ -195,7 +197,7 @@ static FailureOr<Path> findAMDAIETool(std::string toolName,
   if (std::filesystem::exists(toolBinExe)) return toolBinExe;
 
   llvm::errs() << "Could not find " << toolName
-               << ". Check your --iree-amd-aie-install-dir flag";
+               << ". Check your --iree-amd-aie-install-dir flag\n";
   return failure();
 }
 
@@ -276,12 +278,30 @@ std::optional<std::string> dumpStrToDisk(const std::string &payload,
   return {};
 }
 
+bool hasEnding(std::string const &fullString, std::string const &ending) {
+  if (fullString.length() >= ending.length()) {
+    return fullString.compare(fullString.length() - ending.length(),
+                              ending.length(), ending) == 0;
+  }
+  return false;
+}
+
 // Returns either:
 //  -- the output of running the tool, if run without failure, or
 //  -- an empty optional, if the tool fails to run.
-static std::optional<std::string> runTool(
-    const std::string &program, const std::vector<std::string> &args,
+LogicalResult runTool(
+    const std::string &program_, const std::vector<std::string> &args,
     bool verbose, std::optional<std::vector<std::string>> env = std::nullopt) {
+  std::string program;
+#if defined(_WIN32)
+  if (hasEnding(program_, ".exe")) {
+    program = program_;
+  } else {
+    program = program_ + ".exe";
+  }
+#else
+  program = programs_;
+#endif  // _WIN32
   if (verbose) {
     llvm::outs() << "\nRun: ";
     if (env)
@@ -294,14 +314,11 @@ static std::optional<std::string> runTool(
   // Check that 'program' is a valid path, if not, fail immediately.
   if (!std::filesystem::exists(program)) {
     llvm::errs() << "Program " << program << " does not exist\n";
-    return {};
+    return failure();
   }
 
   // Run the program, piping any output to a temporary file (we only want to
   // print to terminal if verbose is true).
-  std::string errMsg;
-  sys::ProcessStatistics stats;
-  std::optional<sys::ProcessStatistics> optStats(stats);
   SmallVector<StringRef, 8> pArgs = {program};
   pArgs.append(args.begin(), args.end());
   SmallVector<char> temporaryPath;
@@ -313,20 +330,34 @@ static std::optional<std::string> runTool(
     if (errorCode) {
       llvm::errs() << "Failed to create temporary file: " << errorCode.message()
                    << "\n";
-      return {};
+      return failure();
     }
   }
 
   std::string temporaryPathStr =
       std::string(temporaryPath.begin(), temporaryPath.size());
   StringRef temporaryPathRef(temporaryPathStr);
-  auto tp = std::optional<StringRef>(temporaryPathRef);
   llvm::SmallVector<llvm::StringRef> envSmallVec;
   if (env) envSmallVec.append(env->begin(), env->end());
-  int result = sys::ExecuteAndWait(program, pArgs, envSmallVec,
-                                   /* redirects */ {tp, tp, tp}, 0, 0, &errMsg,
-                                   nullptr, &optStats);
 
+  SmallVector<std::optional<StringRef>> redirects;
+#ifdef _WIN32
+  redirects = {{}, {}, {}};
+#else
+  auto tp = std::optional<StringRef>(temporaryPathRef);
+  redirects = {tp, tp, tp};
+#endif
+
+  bool executionFailed;
+  std::string errMsg;
+  sys::ProcessStatistics stats;
+  std::optional<sys::ProcessStatistics> optStats(stats);
+  int result = sys::ExecuteAndWait(program, pArgs, std::nullopt,
+                                   /* redirects */ redirects,
+                                   /*SecondsToWait*/ 10, /*MemoryLimit*/ 0,
+                                   &errMsg, &executionFailed, &optStats);
+
+#ifndef _WIN32
   auto maybeOutputFromFile = [&]() -> std::optional<std::string> {
     std::ifstream t(temporaryPathRef.str());
     std::stringstream buffer;
@@ -340,9 +371,9 @@ static std::optional<std::string> runTool(
   if (!maybeOutputFromFile) {
     llvm::errs() << "Failed to open temporary file " << temporaryPathRef.str()
                  << "\n";
-    return {};
   }
   const std::string &outputFromFile = maybeOutputFromFile.value();
+#endif
 
   if (verbose) {
     float totalTime = std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -352,17 +383,21 @@ static std::optional<std::string> runTool(
     llvm::outs() << "\n"
                  << exitStatusStr << " in totalTime " << totalTime
                  << " [s]. Exit code=" << result << "\n";
+#ifndef _WIN32
     llvm::outs() << outputFromFile << "\n";
+#endif
   }
 
-  if (result != 0) {
+  if (result) {
     llvm::errs() << "Failed to run tool: " << program << ". Error: '" << errMsg
-                 << "'\n"
-                 << outputFromFile;
-    return {};
+                 << "'\n";
+#ifndef _WIN32
+    llvm::errs() << outputFromFile;
+#endif
+    return failure();
   }
 
-  return outputFromFile;
+  return success();
 }
 
 static LogicalResult assembleFileUsingChess(
@@ -378,7 +413,7 @@ static LogicalResult assembleFileUsingChess(
   args.emplace_back("-o");
   args.emplace_back(outputFile);
   std::vector<std::string> env = makeChessEnv(vitisDir, npuVersion);
-  if (!runTool(xChessCCExe, args, verbose, env)) {
+  if (failed(runTool(xChessCCExe, args, verbose, env))) {
     llvm::errs() << "Failed to assemble " << inputFile << " with chess";
     return failure();
   }
@@ -435,11 +470,7 @@ static LogicalResult assembleFileUsingPeano(
   args.emplace_back("-o");
   args.emplace_back(outputFile);
   if (verbose) args.emplace_back("-v");
-  if (!runTool((peanoDir / "bin" / "clang").string(), args, verbose)) {
-    llvm::errs() << "Failed to assemble " << outputFile << ".o with peano";
-    return failure();
-  }
-  return success();
+  return runTool((peanoDir / "bin" / "clang").string(), args, verbose);
 }
 
 static_assert(std::is_same_v<decltype(assembleFileUsingPeano),
@@ -583,52 +614,44 @@ static LogicalResult generateCoreElfFiles(
       chessArgs.emplace_back("-o");
       chessArgs.emplace_back(elfFile.string());
       std::vector<std::string> env = makeChessEnv(*vitisDir, npuVersion);
-      if (!runTool(xChessCCExe, chessArgs, verbose, env)) {
-        llvm::errs() << "Failed to link with xbridge";
-        return failure();
-      }
-    } else {
-      Path ldscriptPath = tempDir / (elfFileName + ".ld");
-      {
-        auto ldscriptOutput =
-            openOutputFile(ldscriptPath.string(), &errorMessage);
-        if (!ldscriptOutput) {
-          llvm::errs() << "Failed to open ldscript file because: "
-                       << errorMessage;
-          return failure();
-        }
-        if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript(
-                deviceOp, ldscriptOutput->os(), col, row))) {
-          llvm::errs() << "failed to generate ld script for core (" << col
-                       << "," << row << ")";
-          return failure();
-        }
-        ldscriptOutput->keep();
-      }
+      return runTool(xChessCCExe, chessArgs, verbose, env);
+    }
 
-      std::string targetLower = StringRef(targetArch).lower();
-      std::vector<std::string> flags;
-      flags.emplace_back(objFile);
-      if (ukernel && (ukernel == "mm" || ukernel == "all")) {
-        flags.emplace_back(mmObjectFilePath->string());
+    Path ldscriptPath = tempDir / (elfFileName + ".ld");
+    {
+      auto ldscriptOutput =
+          openOutputFile(ldscriptPath.string(), &errorMessage);
+      if (!ldscriptOutput) {
+        llvm::errs() << "Failed to open ldscript file because: "
+                     << errorMessage;
+        return failure();
       }
-      flags.emplace_back("--target=" + targetLower + "-none-unknown-elf");
-      flags.emplace_back("-Wl,--gc-sections");
-      flags.emplace_back("-Wl,--orphan-handling=error");
-      flags.emplace_back("-Wl,-T," + ldscriptPath.string());
-      flags.emplace_back("-o");
-      flags.emplace_back(elfFile.string());
-      if (verbose) flags.emplace_back("-v");
-      // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected
-      // automatically into the ld.lld invocation
-      if (!runTool((peanoDir / "bin" / "clang").string(), flags, verbose)) {
-        llvm::errs() << "failed to link elf file for core(" << col << "," << row
-                     << ")";
+      if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript(
+              deviceOp, ldscriptOutput->os(), col, row))) {
+        llvm::errs() << "failed to generate ld script for core (" << col << ","
+                     << row << ")\n";
         return failure();
       }
+      ldscriptOutput->keep();
     }
+
+    std::string targetLower = StringRef(targetArch).lower();
+    std::vector<std::string> flags;
+    flags.emplace_back(objFile);
+    if (ukernel && (ukernel == "mm" || ukernel == "all")) {
+      flags.emplace_back(mmObjectFilePath->string());
+    }
+    flags.emplace_back("--target=" + targetLower + "-none-unknown-elf");
+    flags.emplace_back("-Wl,--gc-sections");
+    flags.emplace_back("-Wl,--orphan-handling=error");
+    flags.emplace_back("-Wl,-T," + ldscriptPath.string());
+    flags.emplace_back("-o");
+    flags.emplace_back(elfFile.string());
+    if (verbose) flags.emplace_back("-v");
+    // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected
+    // automatically into the ld.lld invocation
+    return runTool((peanoDir / "bin" / "clang").string(), flags, verbose);
   }
-  return success();
 }
 
 static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp,
@@ -878,75 +901,68 @@ static LogicalResult generateXCLBin(
   FailureOr<Path> xclbinutilBin =
       findAMDAIETool("iree-aie-xclbinutil", amdAIEInstallDir);
 
-  {
-    if (inputXclbin) {
-      // Create aie_partition.json.
-      Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json";
-      std::string inputPartArg =
-          "AIE_PARTITION:JSON:" + aieInputPartitionJsonFile.string();
-      std::vector<std::string> inputFlags{"--dump-section", inputPartArg,
-                                          "--force", "--input", *inputXclbin};
-
-      if (!succeeded(xclbinutilBin) ||
-          !runTool(xclbinutilBin.value().string(), inputFlags, verbose)) {
-        llvm::errs() << "failed to execute xclbinutil";
-        return failure();
-      }
-      auto aieInputPartitionOut =
-          openInputFile(aieInputPartitionJsonFile.string(), &errorMessage);
-      if (!aieInputPartitionOut) {
-        llvm::errs() << "failed to open aie_input_partition.json because: "
-                     << errorMessage;
-        return failure();
-      }
-      Expected<json::Value> aieInputPartitionOutValue =
-          llvm::json::parse(aieInputPartitionOut->getBuffer());
-      json::Array *aieInputPartionPDIs;
-      aieInputPartionPDIs = aieInputPartitionOutValue->getAsObject()
-                                ->getObject("aie_partition")
-                                ->getArray("PDIs");
-      auto aiePartitionOut =
-          openInputFile(aiePartitionJsonFile.string(), &errorMessage);
-      if (!aiePartitionOut) {
-        llvm::errs() << "failed to open aie aie_input_partition.json for "
-                        "output because: "
-                     << errorMessage;
-        return failure();
-      }
-      llvm::Expected<llvm::json::Value> aiePartitionOutValue =
-          llvm::json::parse(aiePartitionOut->getBuffer());
-      json::Array *aiePartionPDIs;
-      aiePartionPDIs = aiePartitionOutValue->getAsObject()
-                           ->getObject("aie_partition")
-                           ->getArray("PDIs");
-      aieInputPartionPDIs->insert(aieInputPartionPDIs->end(),
-                                  aiePartionPDIs->begin(),
-                                  aiePartionPDIs->end());
-      // rewrite aie partion json file
-      if (auto maybeErr =
-              dumpStrToDisk(formatv("{0:2}", *aieInputPartitionOutValue),
-                            aiePartitionJsonFile.string());
-          maybeErr.has_value()) {
-        llvm::errs()
-            << "failed to dump to disk aie_input_partition.json because: "
-            << errorMessage;
-        return failure();
-      }
-      flags.insert(flags.end(), {"--input", *inputXclbin});
-    } else {
-      flags.insert(flags.end(), {"--add-replace-section", memArg});
-    }
-    flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(),
-                               "--add-replace-section", partArg, "--force",
-                               "--output", std::string(Output)});
+  if (failed(xclbinutilBin)) return xclbinutilBin;
 
-    if (!succeeded(xclbinutilBin) ||
-        !runTool(xclbinutilBin.value().string(), flags, verbose)) {
+  if (inputXclbin) {
+    // Create aie_partition.json.
+    Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json";
+    std::string inputPartArg =
+        "AIE_PARTITION:JSON:" + aieInputPartitionJsonFile.string();
+    std::vector<std::string> inputFlags{"--dump-section", inputPartArg,
+                                        "--force", "--input", *inputXclbin};
+
+    if (failed(runTool(xclbinutilBin.value().string(), inputFlags, verbose))) {
       llvm::errs() << "failed to execute xclbinutil";
       return failure();
     }
+    auto aieInputPartitionOut =
+        openInputFile(aieInputPartitionJsonFile.string(), &errorMessage);
+    if (!aieInputPartitionOut) {
+      llvm::errs() << "failed to open aie_input_partition.json because: "
+                   << errorMessage;
+      return failure();
+    }
+    Expected<json::Value> aieInputPartitionOutValue =
+        llvm::json::parse(aieInputPartitionOut->getBuffer());
+    json::Array *aieInputPartionPDIs;
+    aieInputPartionPDIs = aieInputPartitionOutValue->getAsObject()
+                              ->getObject("aie_partition")
+                              ->getArray("PDIs");
+    auto aiePartitionOut =
+        openInputFile(aiePartitionJsonFile.string(), &errorMessage);
+    if (!aiePartitionOut) {
+      llvm::errs() << "failed to open aie aie_input_partition.json for "
+                      "output because: "
+                   << errorMessage;
+      return failure();
+    }
+    llvm::Expected<llvm::json::Value> aiePartitionOutValue =
+        llvm::json::parse(aiePartitionOut->getBuffer());
+    json::Array *aiePartionPDIs;
+    aiePartionPDIs = aiePartitionOutValue->getAsObject()
+                         ->getObject("aie_partition")
+                         ->getArray("PDIs");
+    aieInputPartionPDIs->insert(aieInputPartionPDIs->end(),
+                                aiePartionPDIs->begin(), aiePartionPDIs->end());
+    // rewrite aie partion json file
+    if (auto maybeErr =
+            dumpStrToDisk(formatv("{0:2}", *aieInputPartitionOutValue),
+                          aiePartitionJsonFile.string());
+        maybeErr.has_value()) {
+      llvm::errs()
+          << "failed to dump to disk aie_input_partition.json because: "
+          << errorMessage;
+      return failure();
+    }
+    flags.insert(flags.end(), {"--input", *inputXclbin});
+  } else {
+    flags.insert(flags.end(), {"--add-replace-section", memArg});
   }
-  return success();
+  flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(),
+                             "--add-replace-section", partArg, "--force",
+                             "--output", std::string(Output)});
+
+  return runTool(xclbinutilBin.value().string(), flags, verbose);
 }
 
 static std::string chesshack(const std::string &input) {
@@ -1045,13 +1061,16 @@ static LogicalResult generateUnifiedObject(
     llvm::outs() << "\n";
   }
 
-  if (failed(pm.run(moduleOpCopy)))
-    return deviceOp.emitOpError("Failed to lower to LLVM");
+  if (failed(pm.run(moduleOpCopy))) {
+    llvm::errs() << "Failed to lower to LLVM";
+    return failure();
+  }
 
   llvm::LLVMContext llvmContext;
-  auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext);
+  auto llvmModule = translateModuleToLLVMIR(moduleOpcopy, llvmContext);
   if (!llvmModule) {
-    return deviceOp.emitOpError("Failed to translate module to LLVMIR");
+    llvm::errs() << "Failed to translate module to LLVMIR";
+    return failure();
   }
 
   std::string inputLLStr;
@@ -1098,18 +1117,18 @@ static LogicalResult generateUnifiedObject(
     std::vector<std::string> peanoArgs = makePeanoOptArgs();
     args.reserve(args.size() + peanoArgs.size());
     args.insert(args.end(), peanoArgs.begin(), peanoArgs.end());
-    if (!runTool(peanoOptBin.string(), args, verbose)) {
+    if (failed(runTool(peanoOptBin.string(), args, verbose))) {
       llvm::errs() << "Failed to optimize ll with peano";
       return failure();
     }
 
-    if (!runTool(
+    if (failed(runTool(
             peanoLLCBin.string(),
             {OptLLVMIRFile.string(), "-O2",
              "--march=" + StringRef(targetArch).lower(), "--function-sections",
              "--filetype=obj", "-o", std::string(outputFile)},
-            verbose)) {
-      llvm::errs() << "Failed to assemble ll with peano";
+            verbose))) {
+      llvm::errs() << "Failed to assemble ll with peano\n";
       return failure();
     }
   }
@@ -1156,8 +1175,10 @@ LogicalResult aie2xclbin(
   PassManager pm(ctx, AIE::DeviceOp::getOperationName());
   applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll,
                            printIRModuleScope, timing);
-  if (failed(pm.run(deviceOp)))
-    return deviceOp.emitOpError(": NPU Instruction pipeline failed");
+  if (failed(pm.run(deviceOp))) {
+    llvm::errs() << ": NPU Instruction pipeline failed";
+    return failure();
+  }
 
   FailureOr<ArrayRef<uint32_t>> maybeNpuInstructions =
       getNpuInstructions(deviceOp);
@@ -1171,7 +1192,7 @@ LogicalResult aie2xclbin(
   auto output = openOutputFile(outputNPU, &errorMessage);
   if (!output) {
     llvm::errs() << "Failed to open npu_instructions.txt for writing because: "
-                 << errorMessage;
+                 << errorMessage << "\n";
     return failure();
   }
   for (uint32_t w : npuInstructions) output->os() << llvm::format("%08X\n", w);
@@ -1181,22 +1202,30 @@ LogicalResult aie2xclbin(
   if (failed(generateUnifiedObject(
           ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll,
           printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir,
-          targetArch, peanoDir, npuVersion)))
-    return deviceOp.emitOpError("Failed to generate unified object");
+          targetArch, peanoDir, npuVersion))) {
+    llvm::errs() << "Failed to generate unified object\n";
+    return failure();
+  }
 
   if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDir,
                                   useChess, vitisDir, targetArch, verbose,
-                                  peanoDir, npuVersion, ukernel)))
-    return deviceOp.emitOpError("Failed to generate core ELF file(s)");
+                                  peanoDir, npuVersion, ukernel))) {
+    llvm::errs() << "Failed to generate core ELF file(s)\n";
+    return failure();
+  }
 
   if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll,
-                         printIRModuleScope, timing, tempDir)))
-    return deviceOp.emitOpError("Failed to generate CDO");
+                         printIRModuleScope, timing, tempDir))) {
+    llvm::errs() << "Failed to generate CDO\n";
+    return failure();
+  }
 
   if (failed(generateXCLBin(outputXCLBin, tempDir, xclBinKernelID,
                             xclBinKernelName, xclBinInstanceName,
-                            amdAIEInstallDir, verbose, InputXCLBin)))
-    return deviceOp.emitOpError("Failed to generate XCLBin");
+                            amdAIEInstallDir, verbose, InputXCLBin))) {
+    llvm::errs() << "Failed to generate XCLBin\n";
+    return failure();
+  }
 
   return success();
 }
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index 44773bfdf..0e740d1d5 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -213,6 +213,16 @@ LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
                            const TileLoc &tileLoc, const Path &elfPath,
                            bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
+  // this isn't the case elsewhere but for whatever reason
+  // fopen (what XAie_LoadElf ultimately calls) braeks for >=256
+#ifdef _WIN32
+  if (elfPath.string().size() >= 256) {
+    llvm::errs() << "Windows paths must be less than 256 chars for elf loading "
+                    "to work (seriously):"
+                 << elfPath.string() << "\n";
+    return failure();
+  }
+#endif
   TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, devInst, tileLoc,
                               elfPath.string().c_str(),
                               /*loadSym*/ aieSim);
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h
index c485bb497..774549862 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h
@@ -414,7 +414,7 @@ static_assert(XAIE_OK == 0);
     LLVM_DEBUG(llvm::dbgs().flush());                                   \
     if (auto r = API(__VA_ARGS__))                                      \
       llvm::report_fatal_error(llvm::Twine(#API " failed with ") +      \
-                               to_string(r));                           \
+                               to_string(r) + "\n");                    \
   } while (0)
 
 #define TRY_XAIE_API_LOGICAL_RESULT(API, ...)                           \
@@ -424,7 +424,7 @@ static_assert(XAIE_OK == 0);
     LLVM_DEBUG(llvm::dbgs() << "\n");                                   \
     LLVM_DEBUG(llvm::dbgs().flush());                                   \
     if (auto r = API(__VA_ARGS__)) {                                    \
-      llvm::errs() << #API " failed with " << r;                        \
+      llvm::errs() << #API " failed with " << r << "\n";                \
       return failure();                                                 \
     }                                                                   \
   } while (0)

From c4a3933c4c43e08fbebbbba1d3853d7276bdcf1a Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Tue, 20 Aug 2024 20:46:42 -0500
Subject: [PATCH 03/28] works with the closed source shim

---
 build_tools/ci/cpu_comparison/run_test.py     | 14 +++--
 cmake/iree_aie_xrt.cmake                      | 55 +++++++++++--------
 .../iree-amd-aie/Target/CMakeLists.txt        |  2 +-
 .../driver/xrt/native_executable.cc           | 13 ++++-
 .../src/iree-amd-aie/driver/xrt/xrt_driver.cc | 16 ++++--
 5 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index ff8faa090..291399fb1 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -60,7 +60,7 @@ def find_executable(install_dir: Path, executable_name):
     )
 
 
-def shell_out(cmd: list, workdir=None, verbose=False, raiseOnError=True):
+def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True):
     if workdir is None:
         workdir = Path.cwd()
     if not isinstance(cmd, list):
@@ -70,7 +70,9 @@ def shell_out(cmd: list, workdir=None, verbose=False, raiseOnError=True):
             cmd[i] = str(c)
     env = os.environ
     if verbose:
-        _cmd = " ".join([f"{k}={v}" for k, v in env.items()]) + " " + " ".join(cmd)
+        _cmd = " ".join(cmd)
+        if verbose > 1:
+            _cmd = " ".join([f"{k}={v}" for k, v in env.items()]) + " " + _cmd
         print(f"Running the following command:\n{_cmd}")
 
     handle = subprocess.run(cmd, capture_output=True, cwd=workdir, env=env)
@@ -83,11 +85,11 @@ def shell_out(cmd: list, workdir=None, verbose=False, raiseOnError=True):
         if stderr_decode:
             print("Standard error from script:")
             print(stderr_decode)
-    if not raiseOnError and handle.returncode != 0:
+    if not raise_on_error and handle.returncode != 0:
         print(
             f"Error executing script, error code was {handle.returncode}. Not raising an error."
         )
-    if raiseOnError and handle.returncode != 0:
+    if raise_on_error and handle.returncode != 0:
         raise RuntimeError(
             f"Error executing script, error code was {handle.returncode}"
         )
@@ -332,7 +334,7 @@ def __init__(
         peano_clang_path = peano_dir / "bin" / "clang"
         if peano_clang_path.exists():
             _, clang_v_output = shell_out(
-                [peano_clang_path, "-v"], verbose=self.verbose, raiseOnError=False
+                [peano_clang_path, "-v"], verbose=self.verbose, raise_on_error=False
             )
             peano_commit_hash = re.findall(
                 r"clang version \d+\.\d+\.\d+ \(https://github.com/Xilinx/llvm-aie (\w+)\)",
@@ -812,7 +814,7 @@ def all_tests(
         ),
     )
 
-    parser.add_argument("--verbose", action="store_true")
+    parser.add_argument('-v', '--verbose', action='count', default=0)
 
     parser.add_argument(
         "--reset-npu-between-runs",
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index 5879e21cd..eb9249297 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -4,7 +4,7 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-if(TARGET iree_aie_xrt_iree-aie-xclbinutil)
+if(TARGET iree-aie-xclbinutil)
   return()
 endif()
 
@@ -56,6 +56,14 @@ set(_xclbinutil_source_dir ${IREE_XRT_SOURCE_DIR}/runtime_src/tools/xclbinutil)
 # remove ssl dep
 replace_string_in_file(${_xclbinutil_source_dir}/XclBinUtilMain.cxx
                        "bValidateSignature == true" "false")
+# returning string& to an iterator...............
+replace_string_in_file(${_xclbinutil_source_dir}/SectionAIEResourcesBin.h
+                       "static const std::string& getSubSectionName" "static std::string getSubSectionName")
+set(_const_str "
+const std::string&
+SectionAIEResourcesBin::getSubSectionName")
+replace_string_in_file(${_xclbinutil_source_dir}/SectionAIEResourcesBin.cxx
+                       "${_const_str}" "std::string SectionAIEResourcesBin::getSubSectionName")
 
 # transformcdo target
 if(NOT WIN32)
@@ -134,42 +142,43 @@ list(REMOVE_ITEM _xclbinutil_srcs "${_xclbinutil_source_dir}/SectionSmartNic.cxx
 # and then --add-replace-section:MEM_TOPOLOGY won't work...
 # XRT/src/runtime_src/tools/xclbinutil/SectionMemTopology.cxx#L26-L41
 # TODO(max): and for whatever reason -WL,--whole-archive doesn't work
-set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}")
-set(IREE_PACKAGE_ROOT_PREFIX "iree::aie::xrt")
-iree_cc_binary(
-  NAME
-    # if you rename this be sure to update the if(...) return up top
-    # otherwise this script will be entered twice and you'll get a confusing error like
-    # "can't do add_executable; target already exists"
-    iree-aie-xclbinutil
-  SRCS
-    ${_xclbinutil_srcs}
-  COPTS
-    $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
-    $<$<PLATFORM_ID:Windows>:/EHsc /GR>
-  DEFINES
-    BOOST_BIND_GLOBAL_PLACEHOLDERS
-  INSTALL_COMPONENT
-    IREETools-Runtime
-  PUBLIC
-)
+add_executable(iree-aie-xclbinutil ${_xclbinutil_srcs})
 
+target_compile_definitions(iree-aie-xclbinutil
+                           PRIVATE
+                           -DBOOST_BIND_GLOBAL_PLACEHOLDERS)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
-target_link_libraries(iree_aie_xrt_iree-aie-xclbinutil
+target_link_libraries(iree-aie-xclbinutil
                       PRIVATE
                       Threads::Threads
                       $<BUILD_LOCAL_INTERFACE:${IREE_AIE_BOOST_LIBS}>
                       $<$<PLATFORM_ID:Linux>:$<BUILD_LOCAL_INTERFACE:transformcdo>>)
-target_include_directories(iree_aie_xrt_iree-aie-xclbinutil
+target_include_directories(iree-aie-xclbinutil
                            PRIVATE ${XRT_BINARY_DIR}/gen
                                    ${IREE_XRT_SOURCE_DIR}/runtime_src/core/include
                                    ${_xclbinutil_source_dir})
-set_target_properties(iree_aie_xrt_iree-aie-xclbinutil
+target_compile_options(iree-aie-xclbinutil
+                       PRIVATE
+                       $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
+                       $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
+set_target_properties(iree-aie-xclbinutil
                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tools")
 
+# iree_install_targets has EXCLUDE_FROM_ALL
+install(
+  TARGETS iree-aie-xclbinutil
+  EXPORT IREEExported-Runtime
+  COMPONENT IREETools-Runtime
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+
+
 # ##############################################################################
 # xrt_coreutil
 # ##############################################################################
+set(XRT_AIE_BUILD "yes")
+set(XRT_ENABLE_AIE "yes")
+add_definitions(-DXRT_ENABLE_AIE -DXRT_AIE_BUILD)
 
 # send xrt_coreutil to trash so it doesn't get installed
 set(XRT_INSTALL_LIB_DIR "$ENV{TMP}")
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
index 1d9e94123..3c7cd4d64 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt
@@ -21,7 +21,7 @@ iree_cc_library(
 )
 
 if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
-  add_dependencies(iree_target_amd-aie_Target_AIETargets iree_aie_xrt_iree-aie-xclbinutil)
+  add_dependencies(iree_target_amd-aie_Target_AIETargets iree-aie-xclbinutil)
 endif()
 
 iree_cc_library(
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index 8ae44b877..2bdb50a11 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -178,6 +178,7 @@ iree_status_t iree_hal_xrt_native_executable_create(
     std::vector<char> xclbinVector(
         xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
     std::unique_ptr<xrt::xclbin> xclbin;
+    std::cerr << "XILINX_XRT: " << getenv("XILINX_XRT") << "\n";
     try {
       xclbin = std::make_unique<xrt::xclbin>(xclbinVector);
     } catch (std::runtime_error& e) {
@@ -185,6 +186,12 @@ iree_status_t iree_hal_xrt_native_executable_create(
                               e.what());
     }
     device->register_xclbin(*xclbin);
+    try {
+      xrt::hw_context context(*device, xclbin->get_uuid());
+    } catch (std::runtime_error& e) {
+      return iree_make_status(IREE_STATUS_INTERNAL, "xrt::hw_context context: %s",
+                              e.what());
+    }
     xrt::hw_context context(*device, xclbin->get_uuid());
     uint32_t asm_instr_index =
         flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
@@ -202,7 +209,7 @@ iree_status_t iree_hal_xrt_native_executable_create(
       // the second argument to the kernel and we can use group id 1.
       int group_id = 1;
       instr = std::make_unique<xrt::bo>(*device, num_instr * sizeof(uint32_t),
-                                        XCL_BO_FLAGS_CACHEABLE, group_id);
+                                        XCL_BO_FLAGS_CACHEABLE, kernel->group_id(group_id));
     } catch (...) {
       iree_hal_executable_destroy((iree_hal_executable_t*)executable);
       IREE_TRACE_ZONE_END(z0);
@@ -267,8 +274,8 @@ static void iree_hal_xrt_native_executable_destroy(
 
   for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) {
     try {
-      delete executable->entry_points[i].kernel;
-      delete executable->entry_points[i].instr;
+      // delete executable->entry_points[i].kernel;
+      // delete executable->entry_points[i].instr;
       // TODO(jornt): deleting the xclbin here will result in a corrupted size
       // error in XRT. It looks like the xclbin needs to stay alive while the
       // device is alive if it has been registered.
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index b9c8aaacd..42b8adbd3 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -41,7 +41,7 @@ typedef struct iree_hal_xrt_driver_t {
   // Parameters used to control device behavior.
   iree_hal_xrt_device_params_t device_params;
 
-  xrt::device *device;
+  xrt::device* device;
 
 } iree_hal_xrt_driver_t;
 
@@ -92,7 +92,13 @@ iree_status_t iree_hal_xrt_driver_create_internal(
                             "No XRT devices found");
   }
   // Get handle to xrt device
-  global_device = xrt::device(0);
+  std::cerr << xrt::system::enumerate_devices() << "\n";
+  try {
+    global_device = xrt::device(0);
+  } catch (std::runtime_error& e) {
+    return iree_make_status(IREE_STATUS_INTERNAL, "xrt::device(0) failed: %s",
+                            e.what());
+  }
   driver->device = &global_device;
   *out_driver = (iree_hal_driver_t*)driver;
   return iree_ok_status();
@@ -128,7 +134,7 @@ static iree_status_t iree_hal_xrt_driver_dump_device_info(
     iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
     iree_string_builder_t* builder) {
   iree_hal_xrt_driver_t* driver = iree_hal_xrt_driver_cast(base_driver);
-  xrt::device *device = driver->device;
+  xrt::device* device = driver->device;
   IREE_RETURN_IF_ERROR(
       iree_string_builder_append_cstring(builder, "\n- Platform:"));
 
@@ -147,7 +153,7 @@ static iree_status_t iree_hal_xrt_driver_dump_device_info(
 // |out_device_info| must point to valid memory and additional data will be
 // appended to |buffer_ptr| and the new pointer is returned.
 static iree_status_t iree_hal_xrt_populate_device_info(
-    xrt::device *device, uint8_t* buffer_ptr, uint8_t** out_buffer_ptr,
+    xrt::device* device, uint8_t* buffer_ptr, uint8_t** out_buffer_ptr,
     iree_hal_device_info_t* out_device_info) {
   *out_buffer_ptr = buffer_ptr;
 
@@ -183,7 +189,7 @@ static iree_status_t iree_hal_xrt_driver_query_available_devices(
     iree_host_size_t* out_device_info_count,
     iree_hal_device_info_t** out_device_infos) {
   iree_hal_xrt_driver_t* driver = iree_hal_xrt_driver_cast(base_driver);
-  xrt::device *device = driver->device;
+  xrt::device* device = driver->device;
   // Allocate the return infos and populate with the devices.
   iree_hal_device_info_t* device_infos = NULL;
   iree_host_size_t single_info_size =

From ce332c3898bfd92fec49917b8f8f6c890d0d6f9d Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 21 Aug 2024 10:18:03 -0500
Subject: [PATCH 04/28] fork XRT

---
 .gitmodules                                   |  3 +-
 cmake/iree_aie_xrt.cmake                      | 36 +++----------------
 .../driver/xrt/native_executable.cc           |  8 ++---
 third_party/XRT                               |  2 +-
 4 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 11197c42f..235562936 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,7 +1,8 @@
 [submodule "third_party/XRT"]
 	path = third_party/XRT
-	url = https://github.com/Xilinx/XRT.git
+	url = https://github.com/nod-ai/XRT.git
 	shallow = true
+	branch = iree-amd-aie-patches
 [submodule "third_party/mlir-air"]
 	path = third_party/mlir-air
 	url = https://github.com/nod-ai/mlir-air.git
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index eb9249297..cfe683852 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -53,18 +53,6 @@ set(IREE_XRT_SOURCE_DIR "${IREE_AMD_AIE_SOURCE_DIR}/third_party/XRT/src")
 
 set(_xclbinutil_source_dir ${IREE_XRT_SOURCE_DIR}/runtime_src/tools/xclbinutil)
 
-# remove ssl dep
-replace_string_in_file(${_xclbinutil_source_dir}/XclBinUtilMain.cxx
-                       "bValidateSignature == true" "false")
-# returning string& to an iterator...............
-replace_string_in_file(${_xclbinutil_source_dir}/SectionAIEResourcesBin.h
-                       "static const std::string& getSubSectionName" "static std::string getSubSectionName")
-set(_const_str "
-const std::string&
-SectionAIEResourcesBin::getSubSectionName")
-replace_string_in_file(${_xclbinutil_source_dir}/SectionAIEResourcesBin.cxx
-                       "${_const_str}" "std::string SectionAIEResourcesBin::getSubSectionName")
-
 # transformcdo target
 if(NOT WIN32)
   replace_string_in_file(${_xclbinutil_source_dir}/aie-pdi-transform/src/CMakeLists.txt
@@ -74,7 +62,7 @@ endif()
 
 # otherwise the various stois that read these will explode...
 # XRT/src/runtime_src/tools/xclbinutil/XclBinClass.cxx#L55
-file(READ ${IREE_XRT_SOURCE_DIR}/CMakeLists.txt _xrt_cmake_file_contents)
+file(READ ${IREE_XRT_SOURCE_DIR}/CMake/settings.cmake _xrt_cmake_file_contents)
 string(REGEX MATCH "XRT_VERSION_MAJOR ([0-9]+)" XRT_VERSION_MAJOR ${_xrt_cmake_file_contents})
 # note CMAKE_MATCH_0 is the whole match...
 set(XRT_VERSION_MAJOR ${CMAKE_MATCH_1})
@@ -95,24 +83,6 @@ configure_file(${IREE_XRT_SOURCE_DIR}/CMake/config/version.h.in
                ${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/gen/version.h)
 configure_file(${IREE_XRT_SOURCE_DIR}/CMake/config/version.h.in
                ${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/api/version.h)
-replace_string_in_file(${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/query.h
-                       "#include <stdexcept>" "#include <any>")
-
-set(_noop_xclbin_sig_cxx "
-#include \"XclBinSignature.h\"
-void signXclBinImage(const std::string& _fileOnDisk,
-                     const std::string& _sPrivateKey,
-                     const std::string& _sCertificate,
-                     const std::string& _sDigestAlgorithm,
-                     bool _bEnableDebugOutput) {}
-void verifyXclBinImage(const std::string& _fileOnDisk,
-                       const std::string& _sCertificate,
-                       bool _bEnableDebugOutput) {}
-void dumpSignatureFile(const std::string& _fileOnDisk,
-                       const std::string& _signatureFile) {}
-void getXclBinPKCSStats(const std::string& _xclBinFile,
-                        XclBinPKCSImageStats& _xclBinPKCSImageStats) {}")
-file(WRITE "${_xclbinutil_source_dir}/XclBinSignature.cxx" "${_noop_xclbin_sig_cxx}")
 
 file(
   GLOB
@@ -176,8 +146,12 @@ install(
 # ##############################################################################
 # xrt_coreutil
 # ##############################################################################
+
+message(STATUS "building XRT core libs")
+
 set(XRT_AIE_BUILD "yes")
 set(XRT_ENABLE_AIE "yes")
+set(XRT_NATIVE_BUILD "yes")
 add_definitions(-DXRT_ENABLE_AIE -DXRT_AIE_BUILD)
 
 # send xrt_coreutil to trash so it doesn't get installed
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index 2bdb50a11..c5da9f217 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -178,7 +178,6 @@ iree_status_t iree_hal_xrt_native_executable_create(
     std::vector<char> xclbinVector(
         xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
     std::unique_ptr<xrt::xclbin> xclbin;
-    std::cerr << "XILINX_XRT: " << getenv("XILINX_XRT") << "\n";
     try {
       xclbin = std::make_unique<xrt::xclbin>(xclbinVector);
     } catch (std::runtime_error& e) {
@@ -189,8 +188,8 @@ iree_status_t iree_hal_xrt_native_executable_create(
     try {
       xrt::hw_context context(*device, xclbin->get_uuid());
     } catch (std::runtime_error& e) {
-      return iree_make_status(IREE_STATUS_INTERNAL, "xrt::hw_context context: %s",
-                              e.what());
+      return iree_make_status(IREE_STATUS_INTERNAL,
+                              "xrt::hw_context context: %s", e.what());
     }
     xrt::hw_context context(*device, xclbin->get_uuid());
     uint32_t asm_instr_index =
@@ -209,7 +208,8 @@ iree_status_t iree_hal_xrt_native_executable_create(
       // the second argument to the kernel and we can use group id 1.
       int group_id = 1;
       instr = std::make_unique<xrt::bo>(*device, num_instr * sizeof(uint32_t),
-                                        XCL_BO_FLAGS_CACHEABLE, kernel->group_id(group_id));
+                                        XCL_BO_FLAGS_CACHEABLE,
+                                        kernel->group_id(group_id));
     } catch (...) {
       iree_hal_executable_destroy((iree_hal_executable_t*)executable);
       IREE_TRACE_ZONE_END(z0);
diff --git a/third_party/XRT b/third_party/XRT
index 8d070495d..a9fdf618c 160000
--- a/third_party/XRT
+++ b/third_party/XRT
@@ -1 +1 @@
-Subproject commit 8d070495d092a2e773f2360cbff4fa29138da67d
+Subproject commit a9fdf618ceba32d28bbf6715a5ee627a51a74b24

From aae1aa459e68dbb9818ac5a11a0b18c7ee3a84e0 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 21 Aug 2024 14:30:08 -0500
Subject: [PATCH 05/28] install xrt_coreutil on windows right next run-module

---
 cmake/iree_aie_xrt.cmake | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index cfe683852..47db2979d 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -139,8 +139,7 @@ install(
   TARGETS iree-aie-xclbinutil
   EXPORT IREEExported-Runtime
   COMPONENT IREETools-Runtime
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 
 # ##############################################################################
@@ -169,8 +168,8 @@ set(_core_libs
     core_common_library_objects
     core_common_api_library_objects
     core_common_xdp_profile_objects
-    xrt_coreutil
-)
+    xrt_coreutil)
+
 foreach(_core_lib IN LISTS _core_libs)
   target_include_directories(${_core_lib} PUBLIC
                              ${IREE_XRT_SOURCE_DIR}/runtime_src/core/include
@@ -185,3 +184,10 @@ foreach(_core_lib IN LISTS _core_libs)
                          $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
   target_link_libraries(${_core_lib} PUBLIC $<BUILD_LOCAL_INTERFACE:${IREE_AIE_BOOST_LIBS}>)
 endforeach()
+if (WIN32)
+  install(
+    TARGETS xrt_coreutil
+    EXPORT IREEExported-Runtime
+    COMPONENT IREETools-Runtime
+    LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()

From 85e2c798d2691a787d72b6c87192a6b6b33b45df Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 21 Aug 2024 18:25:49 -0500
Subject: [PATCH 06/28] clean up

---
 build_tools/ci/build_test_cpp.sh              | 10 +++
 build_tools/ci/cpu_comparison/run_test.py     | 79 +++++++++----------
 cmake/iree_aie_bootgen.cmake                  |  5 +-
 cmake/iree_aie_xrt.cmake                      |  3 +-
 .../aievec/VectorToAIEVecConversions.cpp      |  8 --
 .../AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp   | 20 +----
 .../Target/AMDAIETargetCDODirect.cpp          |  4 +-
 .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 51 ++++++------
 .../AMDAIELocalizeLogicalObjectFifo.cpp       |  5 --
 .../aie_runtime/iree_aie_configure.cc         | 15 ++--
 .../aie_runtime/iree_aie_configure.h          | 10 +--
 .../src/iree-amd-aie/driver/xrt/xrt_driver.cc |  3 +-
 12 files changed, 96 insertions(+), 117 deletions(-)

diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh
index 53d190f0f..f8fa87ba4 100644
--- a/build_tools/ci/build_test_cpp.sh
+++ b/build_tools/ci/build_test_cpp.sh
@@ -32,6 +32,9 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then
   export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake"
   export CC=clang
   export CXX=clang++
+else
+  export CC=clang-cl.exe
+  export CXX=clang-cl.exe
 fi
 export CCACHE_DIR="${cache_dir}/ccache"
 export CCACHE_MAXSIZE="700M"
@@ -63,6 +66,13 @@ CMAKE_ARGS="\
   -DCMAKE_BUILD_TYPE=Release \
   -DCMAKE_INSTALL_PREFIX=$install_dir \
   -DCMAKE_INSTALL_LIBDIR=lib \
+  -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+  -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+  -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+  -DCMAKE_C_COMPILER="${CC}" \
+  -DCMAKE_CXX_COMPILER="${CXX}" \
+  -DLLVM_TARGET_ARCH=X86 \
+  -DLLVM_TARGETS_TO_BUILD=X86 \
   -DIREE_ENABLE_ASSERTIONS=ON \
   -DIREE_BUILD_SAMPLES=OFF \
   -DIREE_BUILD_PYTHON_BINDINGS=ON \
diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 291399fb1..0b87b9aa1 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -60,7 +60,7 @@ def find_executable(install_dir: Path, executable_name):
     )
 
 
-def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True):
+def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, env=None):
     if workdir is None:
         workdir = Path.cwd()
     if not isinstance(cmd, list):
@@ -68,7 +68,11 @@ def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True):
     for i, c in enumerate(cmd):
         if isinstance(c, Path):
             cmd[i] = str(c)
-    env = os.environ
+    if env is None:
+        env = {}
+
+    env = {**env, **os.environ}
+
     if verbose:
         _cmd = " ".join(cmd)
         if verbose > 1:
@@ -228,20 +232,20 @@ class TestConfig:
     """
 
     def __init__(
-        self,
-        output_dir,
-        iree_install_dir,
-        peano_dir,
-        xrt_dir,
-        vitis_dir,
-        file_dir,
-        iree_compile_exe,
-        iree_run_exe,
-        verbose,
-        return_on_fail,
-        reset_npu_between_runs,
-        do_not_run_aie,
-        additional_aie_compilation_flags,
+            self,
+            output_dir,
+            iree_install_dir,
+            peano_dir,
+            xrt_dir,
+            vitis_dir,
+            file_dir,
+            iree_compile_exe,
+            iree_run_exe,
+            verbose,
+            return_on_fail,
+            reset_npu_between_runs,
+            do_not_run_aie,
+            additional_aie_compilation_flags,
     ):
         self.output_dir = output_dir
         self.iree_install_dir = iree_install_dir
@@ -263,18 +267,6 @@ def __init__(
         self.xrt_hash = "undetermined"
         self.xrt_release = "undetermined"
         self.peano_commit_hash = "undetermined"
-        xrt_bin_dir = xrt_dir
-        if platform.system() != "Windows":
-            xrt_bin_dir /= "bin"
-        xrt_smi_exe = xrt_bin_dir / (
-            "xrt-smi" + ".exe" if platform.system() == "Windows" else ""
-        )
-        if not xrt_smi_exe.exists():
-            xrt_smi_exe = xrt_bin_dir / (
-                "xbutil" + ".exe" if platform.system() == "Windows" else ""
-            )
-        if not xrt_smi_exe.exists():
-            raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}")
 
         self.reset_npu_script = file_dir.parent / "reset_npu.sh"
         if reset_npu_between_runs and not self.reset_npu_script.exists():
@@ -282,6 +274,24 @@ def __init__(
                 f"The file {self.reset_npu_script} does not exist, and reset_npu_script=True"
             )
 
+        # Populated at runtime
+        self.failures = []
+
+        if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int):
+            raise ValueError(
+                f"verbose must be a boolean or integer, not {type(verbose)}"
+            )
+
+        if not get_component_log:
+            return
+
+        xrt_bin_dir = xrt_dir / "bin"
+        xrt_smi_exe = xrt_bin_dir / "xrt-smi"
+        if not xrt_smi_exe.exists():
+            xrt_smi_exe = xrt_bin_dir / "xbutil"
+        if not xrt_smi_exe.exists():
+            raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}")
+
         # Get the string output of the xrt-smi 'examine' command. Expect the
         # string to look something like:
         #
@@ -330,7 +340,6 @@ def __init__(
 
         # Try and get the peano commit hash. This is a bit of a hack, if it fails
         # peano_commit_has is left as "undetermined".
-        self.peano_commit_hash = "undetermined"
         peano_clang_path = peano_dir / "bin" / "clang"
         if peano_clang_path.exists():
             _, clang_v_output = shell_out(
@@ -344,14 +353,6 @@ def __init__(
             if peano_commit_hash:
                 self.peano_commit_hash = peano_commit_hash[0]
 
-        # Populated at runtime
-        self.failures = []
-
-        if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int):
-            raise ValueError(
-                f"verbose must be a boolean or integer, not {type(verbose)}"
-            )
-
     def __str__(self):
         return dedent(
             f"""
@@ -771,8 +772,6 @@ def all_tests(
         partition = map_to_partition[test]
         partition.run(config)
 
-    # for p in partition:
-
     if config.failures:
         # Convert the list of failed tests into a map: test name to the
         # number of failures (config.failures list may contain duplicates)
@@ -814,7 +813,7 @@ def all_tests(
         ),
     )
 
-    parser.add_argument('-v', '--verbose', action='count', default=0)
+    parser.add_argument("-v", "--verbose", action="count", default=0)
 
     parser.add_argument(
         "--reset-npu-between-runs",
diff --git a/cmake/iree_aie_bootgen.cmake b/cmake/iree_aie_bootgen.cmake
index 526ed9a6f..11238be4c 100644
--- a/cmake/iree_aie_bootgen.cmake
+++ b/cmake/iree_aie_bootgen.cmake
@@ -26,9 +26,10 @@ replace_string_in_file("${_BOOTGEN_SOURCE_DIR}/main.cpp"
 file(GLOB _bootgen_sources "${_BOOTGEN_SOURCE_DIR}/*.c" "${_BOOTGEN_SOURCE_DIR}/*.cpp")
 add_library(iree-aie-bootgen STATIC ${_bootgen_sources})
 
-if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+if(WIN32)
   target_compile_definitions(iree-aie-bootgen PUBLIC YY_NO_UNISTD_H)
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
+endif()
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
   set(_bootgen_c_warning_ignores
       -Wno-cast-qual
       -Wno-covered-switch-default
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index 47db2979d..99539ebf3 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -27,7 +27,8 @@ FetchContent_Declare(
   GIT_PROGRESS TRUE
   DOWNLOAD_NO_EXTRACT FALSE
   # prevents configure from rerunning all the time
-  URL_HASH MD5=84bc7c861606dc66bcfbeb660fcddfd2)
+  DOWNLOAD_EXTRACT_TIMESTAMP TRUE
+  URL_HASH MD5=84BC7C861606DC66BCFBEB660FCDDFD2)
 FetchContent_MakeAvailable(Boost)
 set(IREE_AIE_BOOST_LIBS
     any
diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
index d2a6e4e49..12769f2c9 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
+++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
@@ -840,14 +840,6 @@ static void configureAIEVecCommonLegalizations(ConversionTarget &target) {
       [](arith::SubFOp op) { return !isa<VectorType>(op.getType()); });
 }
 
-static void configureAIEVecV1Legalizations(ConversionTarget &target) {
-  target.addDynamicallyLegalOp<arith::MulIOp>(
-      [](arith::MulIOp op) { return !isa<VectorType>(op.getType()); });
-  target.addDynamicallyLegalOp<arith::MulFOp>(
-      [](arith::MulFOp op) { return !isa<VectorType>(op.getType()); });
-  target.addLegalDialect<memref::MemRefDialect>();
-}
-
 static void configureAIEVecV2Legalizations(ConversionTarget &target) {
   target.addLegalOp<UnrealizedConversionCastOp>();
   target.addLegalOp<vector::ShapeCastOp>();
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
index c746c0877..ff5ca0da8 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
@@ -18,20 +18,6 @@
 
 static const char kPackingConfigAttrName[] = "packing_config";
 
-namespace mlir::iree_compiler {
-
-/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit
-/// integer type whose values is obtained from `values`.
-static ArrayAttr getIndexArrayAttr(MLIRContext *context,
-                                   ArrayRef<int64_t> values) {
-  return ArrayAttr::get(
-      context, llvm::map_to_vector(values, [&](int64_t value) -> Attribute {
-        return IntegerAttr::get(IndexType::get(context), APInt(64, value));
-      }));
-}
-
-}  // namespace mlir::iree_compiler
-
 namespace mlir::iree_compiler::AMDAIE {
 
 //===----------------------------------------------------------------------===//
@@ -73,6 +59,7 @@ void AMDAIEDialect::initializeAMDAIEAttrs() {
   addAttributes<
 #define GET_ATTRDEF_LIST
 #include "iree-amd-aie/IR/AMDAIEAttrs.cpp.inc"  // IWYU pragma: keeep
+
       >();
 }
 
@@ -84,11 +71,6 @@ namespace mlir::iree_compiler {
 // Helpers for forming `amdaie.packing_config_level` attribute.
 // ===----------------------------------------------------------------------===//
 
-static AMDAIE::PermLevelAttr getPermLevelAttr(
-    MLIRContext *context, ArrayRef<int64_t> permLevelVal) {
-  return AMDAIE::PermLevelAttr::get(context, permLevelVal);
-}
-
 static AMDAIE::PermLevelsAttr getPermLevelsAttr(
     MLIRContext *context, ArrayRef<SmallVector<int64_t>> permLevelsVal) {
   SmallVector<AMDAIE::PermLevelAttr> permLevels;
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
index 29216d069..76f289cb8 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
@@ -168,8 +168,8 @@ LogicalResult addAieElfsToCDO(const AMDAIEDeviceModel &deviceModel,
       else
         fileName = "core_" + std::to_string(tileLoc.col) + "_" +
                    std::to_string(tileLoc.row) + ".elf";
-      if (failed(addElfToTile(deviceModel, tileLoc, workDirPath / fileName,
-                              aieSim))) {
+      Path elfPath = workDirPath / fileName;
+      if (failed(addElfToTile(deviceModel, tileLoc, elfPath, aieSim))) {
         return failure();
       }
     }
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 17f2e24b8..0cdcd3c58 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -9,6 +9,7 @@
 #include <filesystem>
 #include <functional>
 #include <random>
+#include <fstream>
 #include <regex>
 #include <sstream>
 #include <unordered_map>
@@ -300,7 +301,7 @@ LogicalResult runTool(
     program = program_ + ".exe";
   }
 #else
-  program = programs_;
+  program = program_;
 #endif  // _WIN32
   if (verbose) {
     llvm::outs() << "\nRun: ";
@@ -413,12 +414,7 @@ static LogicalResult assembleFileUsingChess(
   args.emplace_back("-o");
   args.emplace_back(outputFile);
   std::vector<std::string> env = makeChessEnv(vitisDir, npuVersion);
-  if (failed(runTool(xChessCCExe, args, verbose, env))) {
-    llvm::errs() << "Failed to assemble " << inputFile << " with chess";
-    return failure();
-  }
-
-  return success();
+  return runTool(xChessCCExe, args, verbose, env);
 }
 
 std::vector<std::string> makePeanoOptArgs() {
@@ -516,7 +512,7 @@ static_assert(std::is_same_v<decltype(assembleStringUsingChess),
 
 // Generate the elf files for the core
 static LogicalResult generateCoreElfFiles(
-    AIE::DeviceOp deviceOp, const std::string &objFile, Path tempDir,
+    AIE::DeviceOp deviceOp, const std::string &objFile, Path &tempDir,
     bool useChess, std::optional<Path> vitisDir, const std::string &targetArch,
     bool verbose, Path peanoDir, const std::string &npuVersion,
     const std::optional<std::string> &ukernel) {
@@ -652,6 +648,7 @@ static LogicalResult generateCoreElfFiles(
     // automatically into the ld.lld invocation
     return runTool((peanoDir / "bin" / "clang").string(), flags, verbose);
   }
+  return success();
 }
 
 static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp,
@@ -660,7 +657,6 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp,
                                  const Path &tempDir) {
   auto copy = cast<ModuleOp>(deviceOp.getParentOp()->clone());
   deviceOp = *copy.getOps<AIE::DeviceOp>().begin();
-
   std::string errorMessage;
   PassManager passManager(context, AIE::DeviceOp::getOperationName());
   applyConfigToPassManager(passManager, printIRBeforeAll, printIRAfterAll,
@@ -901,9 +897,11 @@ static LogicalResult generateXCLBin(
   FailureOr<Path> xclbinutilBin =
       findAMDAIETool("iree-aie-xclbinutil", amdAIEInstallDir);
 
-  if (failed(xclbinutilBin)) return xclbinutilBin;
+  if (failed(xclbinutilBin)) return failure();
 
-  if (inputXclbin) {
+  if (!inputXclbin) {
+    flags.insert(flags.end(), {"--add-replace-section", memArg});
+  } else {
     // Create aie_partition.json.
     Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json";
     std::string inputPartArg =
@@ -955,8 +953,6 @@ static LogicalResult generateXCLBin(
       return failure();
     }
     flags.insert(flags.end(), {"--input", *inputXclbin});
-  } else {
-    flags.insert(flags.end(), {"--add-replace-section", memArg});
   }
   flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(),
                              "--add-replace-section", partArg, "--force",
@@ -1035,8 +1031,8 @@ struct RemoveAlignment2FromLLVMLoadPass
 static LogicalResult generateUnifiedObject(
     MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile,
     bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope,
-    bool timing, bool useChess, bool verbose, Path tempDir,
-    std::optional<Path> vitisDir, const std::string &targetArch, Path peanoDir,
+    bool timing, bool useChess, bool verbose, Path &tempDir,
+    std::optional<Path> vitisDir, const std::string &targetArch, Path &peanoDir,
     const std::string &npuVersion) {
   assert(deviceOp->getParentOp() && isa<ModuleOp>(deviceOp->getParentOp()) &&
          "DeviceOp must be in a module parent");
@@ -1067,7 +1063,7 @@ static LogicalResult generateUnifiedObject(
   }
 
   llvm::LLVMContext llvmContext;
-  auto llvmModule = translateModuleToLLVMIR(moduleOpcopy, llvmContext);
+  auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext);
   if (!llvmModule) {
     llvm::errs() << "Failed to translate module to LLVMIR";
     return failure();
@@ -1085,7 +1081,7 @@ static LogicalResult generateUnifiedObject(
     std::string inputLLChessHackedStr = chesshack(inputLLStr);
     FailureOr<Path> maybeVitisDir = findVitis(vitisDir, npuVersion);
     if (failed(maybeVitisDir)) return failure();
-    FailureOr<std::string> chessIntrinsicsObjFile = assembleStringUsingChess(
+    FailureOr<Path> chessIntrinsicsObjFile = assembleStringUsingChess(
         /*inputFileStr=*/inputLLChessHackedStr,
         /*inputFileName=*/"input.chesshacked.ll",
         /*outputFileName=*/outputFile,
@@ -1198,29 +1194,36 @@ LogicalResult aie2xclbin(
   for (uint32_t w : npuInstructions) output->os() << llvm::format("%08X\n", w);
   output->keep();
 
-  Path unifiedObj = Path(tempDir) / "input.o";
+  Path tempDirPath{tempDir};
+  tempDirPath.make_preferred();
+  Path peanoDirPath{peanoDir};
+  peanoDirPath.make_preferred();
+  std::optional<Path> vitisDirPath{vitisDir};
+  if (vitisDirPath) vitisDirPath->make_preferred();
+
+  Path unifiedObj = tempDirPath / "input.o";
   if (failed(generateUnifiedObject(
           ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll,
-          printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir,
-          targetArch, peanoDir, npuVersion))) {
+          printIRModuleScope, timing, useChess, verbose, tempDirPath,
+          vitisDirPath, targetArch, peanoDirPath, npuVersion))) {
     llvm::errs() << "Failed to generate unified object\n";
     return failure();
   }
 
-  if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDir,
-                                  useChess, vitisDir, targetArch, verbose,
+  if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDirPath,
+                                  useChess, vitisDirPath, targetArch, verbose,
                                   peanoDir, npuVersion, ukernel))) {
     llvm::errs() << "Failed to generate core ELF file(s)\n";
     return failure();
   }
 
   if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll,
-                         printIRModuleScope, timing, tempDir))) {
+                         printIRModuleScope, timing, tempDirPath))) {
     llvm::errs() << "Failed to generate CDO\n";
     return failure();
   }
 
-  if (failed(generateXCLBin(outputXCLBin, tempDir, xclBinKernelID,
+  if (failed(generateXCLBin(outputXCLBin, tempDirPath, xclBinKernelID,
                             xclBinKernelName, xclBinInstanceName,
                             amdAIEInstallDir, verbose, InputXCLBin))) {
     llvm::errs() << "Failed to generate XCLBin\n";
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp
index b3b3f8caa..6d7b3f7af 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp
@@ -52,11 +52,6 @@ scf::ForallOp getThreadMappedForallAncestor(Operation *op) {
   return getMappedForallAncestor<mlir::gpu::GPUThreadMappingAttr>(op);
 }
 
-scf::ForallOp getThreadOrBlockMappedForallAncestor(Operation *op) {
-  return getMappedForallAncestor<mlir::gpu::GPUThreadMappingAttr,
-                                 mlir::gpu::GPUBlockMappingAttr>(op);
-}
-
 class AMDAIELocalizeLogicalObjectfifoPass
     : public impl::AMDAIELocalizeLogicalObjectfifoBase<
           AMDAIELocalizeLogicalObjectfifoPass> {
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index 0e740d1d5..a46e8a729 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -15,6 +15,9 @@
 #include "iree_aie_router.h"
 #include "iree_aie_runtime.h"
 #include "llvm/ADT/StringExtras.h"
+#ifdef _WIN32
+#include "llvm/Support/Windows/WindowsSupport.h"
+#endif
 
 #define DEBUG_TYPE "iree-aie-cdo-emitter"
 
@@ -210,19 +213,13 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel,
 }
 
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
-                           const TileLoc &tileLoc, const Path &elfPath,
+                           const TileLoc &tileLoc, Path &elfPath,
                            bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
-  // this isn't the case elsewhere but for whatever reason
-  // fopen (what XAie_LoadElf ultimately calls) braeks for >=256
-#ifdef _WIN32
-  if (elfPath.string().size() >= 256) {
-    llvm::errs() << "Windows paths must be less than 256 chars for elf loading "
-                    "to work (seriously):"
-                 << elfPath.string() << "\n";
+  if (!std::filesystem::exists(elfPath)) {
+    llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n";
     return failure();
   }
-#endif
   TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, devInst, tileLoc,
                               elfPath.string().c_str(),
                               /*loadSym*/ aieSim);
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
index 5f7623289..f738cdd07 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
@@ -102,7 +102,7 @@ BOTH_OSTREAM_OPS_FORALL_CDO_TYPES(OSTREAM_OP_DECL, BOTH_OSTREAM_OP)
 void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug);
 
 /// Generates one of the aie_cdo*.bins. Takes a callback that makes the actual
-/// calls to aie-rt but envelopes it with a prolog and an epilogue of calls to
+/// calls to aie-rt but envelops it with a prolog and an epilogue of calls to
 /// cdo-driver that:
 ///
 /// 1. Starts the "cdo filestream" (literally just fopens a file)
@@ -113,25 +113,25 @@ void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug);
 ///    CDO, checksum, etc.
 /// 5. Finishes the CDO(fcloses the file)
 ///
-/// Note, all of the cdo APIs are simple and available at
+/// Note, all the cdo APIs are simple and available at
 /// iree-amd-aie/third_party/bootgen/cdo-driver/cdo_driver.c
 LogicalResult generateCDOBinary(const std::filesystem::path &outputPath,
                                 const std::function<LogicalResult()> &cb);
 
 /// "Loads" an elf which will be loaded to the program memory of a tile. Loads
-/// is in quotes because where/how the elf is actaully loaded is determined by
+/// is in quotes because where/how the elf is actually loaded is determined by
 /// the aie-rt backend; the CDO backend copies the elf byte by byte into the
 /// CDO.
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
                            const TileLoc &tileLoc,
-                           const std::filesystem::path &elfPath, bool aieSim);
+                           std::filesystem::path &elfPath, bool aieSim);
 
 /// Turn off and turn it back on again...
 LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel,
                                const TileLoc &tileLoc);
 
 /// Sets/programs locks with explicit initializers; note initialize here is a
-/// misnomer because "unintialized" locks actually have their counters
+/// misnomer because "uninitialized" locks actually have their counters
 /// initialized to zero anyway by the hardware.
 LogicalResult initializeLock(const AMDAIEDeviceModel &deviceModel,
                              const Lock &lock);
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index 42b8adbd3..0a2f24fe5 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -92,7 +92,6 @@ iree_status_t iree_hal_xrt_driver_create_internal(
                             "No XRT devices found");
   }
   // Get handle to xrt device
-  std::cerr << xrt::system::enumerate_devices() << "\n";
   try {
     global_device = xrt::device(0);
   } catch (std::runtime_error& e) {
@@ -100,7 +99,7 @@ iree_status_t iree_hal_xrt_driver_create_internal(
                             e.what());
   }
   driver->device = &global_device;
-  *out_driver = (iree_hal_driver_t*)driver;
+  *out_driver = reinterpret_cast<iree_hal_driver_t*>(driver);
   return iree_ok_status();
 }
 

From 26320cc1366ce325ca9e724e638bf7cc35fbbc11 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 21 Aug 2024 19:56:01 -0500
Subject: [PATCH 07/28] add E2E Test windows

---
 .github/workflows/ci-windows.yml              | 42 ++++++++++++++++++-
 build_tools/ci/cpu_comparison/run_test.py     | 28 ++++++-------
 cmake/iree_aie_xrt.cmake                      | 13 +++---
 .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 14 ++++---
 tests/matmul/requirements.txt                 |  3 +-
 5 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
index c299a73ed..6eec6de5d 100644
--- a/.github/workflows/ci-windows.yml
+++ b/.github/workflows/ci-windows.yml
@@ -51,7 +51,7 @@ jobs:
       - name: Setup Cpp
         uses: aminya/setup-cpp@v1
         with:
-          compiler: msvc
+          compiler: llvm
           vcvarsall: true
           cmake: true
           ninja: true
@@ -104,3 +104,43 @@ jobs:
         with:
           path: ${{ env.CACHE_DIR }}
           key: windows-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }}
+
+  test_windows:
+    name: E2E Test windows
+    runs-on: mlevental-win11-pro
+    needs: build_and_ctest
+    strategy:
+      fail-fast: true
+    env:
+      XILINXD_LICENSE_FILE: /home/svcnod/Xilinx.lic
+    steps:
+      - name: "Checking out repository" # for test scripts
+        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+        with:
+          submodules: false # not required for testbench
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: windows_x86_64_release_packages
+
+      - name: Extract artifact
+        run: |
+          mkdir iree-install
+          tar -xf iree-dist-windows.tar -C iree-install
+          bash build_tools/download_peano.sh
+
+      - name: Create venv and install dependencies
+        run: |
+          python -m venv .venv
+          source .venv/Scripts/activate
+          pip install -r tests/matmul/requirements.txt
+
+      - name : E2E comparison of AIE to llvm-cpu
+        run: |
+          source .venv/Scripts/activate
+          python build_tools/ci/cpu_comparison/run_test.py \
+            /c/test_aie_vs_cpu \
+            $PWD/iree-install \
+            $PWD/llvm-aie \
+            /c/Users/maksim/dev_projects/XRT-MCDM/build/WRelease/xilinx/xrt -v
diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 0b87b9aa1..63b474efd 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -232,20 +232,20 @@ class TestConfig:
     """
 
     def __init__(
-            self,
-            output_dir,
-            iree_install_dir,
-            peano_dir,
-            xrt_dir,
-            vitis_dir,
-            file_dir,
-            iree_compile_exe,
-            iree_run_exe,
-            verbose,
-            return_on_fail,
-            reset_npu_between_runs,
-            do_not_run_aie,
-            additional_aie_compilation_flags,
+        self,
+        output_dir,
+        iree_install_dir,
+        peano_dir,
+        xrt_dir,
+        vitis_dir,
+        file_dir,
+        iree_compile_exe,
+        iree_run_exe,
+        verbose,
+        return_on_fail,
+        reset_npu_between_runs,
+        do_not_run_aie,
+        additional_aie_compilation_flags,
     ):
         self.output_dir = output_dir
         self.iree_install_dir = iree_install_dir
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index 99539ebf3..2db2521fa 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -185,10 +185,9 @@ foreach(_core_lib IN LISTS _core_libs)
                          $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
   target_link_libraries(${_core_lib} PUBLIC $<BUILD_LOCAL_INTERFACE:${IREE_AIE_BOOST_LIBS}>)
 endforeach()
-if (WIN32)
-  install(
-    TARGETS xrt_coreutil
-    EXPORT IREEExported-Runtime
-    COMPONENT IREETools-Runtime
-    LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
-endif()
+
+install(
+  TARGETS xrt_coreutil
+  EXPORT IREEExported-Runtime
+  COMPONENT IREETools-Runtime
+  LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 0cdcd3c58..0799b57f9 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -12,6 +12,8 @@
 #include <fstream>
 #include <regex>
 #include <sstream>
+// ReSharper disable once CppUnusedIncludeDirective
+#include <fstream>
 #include <unordered_map>
 
 #include "AMDAIETargets.h"
@@ -335,16 +337,16 @@ LogicalResult runTool(
     }
   }
 
+  SmallVector<std::optional<StringRef>> redirects;
+#ifdef _WIN32
+  redirects = {{}, {}, {}};
+  std::optional<ArrayRef<StringRef>> envSmallVec = std::nullopt;
+#else
   std::string temporaryPathStr =
       std::string(temporaryPath.begin(), temporaryPath.size());
   StringRef temporaryPathRef(temporaryPathStr);
   llvm::SmallVector<llvm::StringRef> envSmallVec;
   if (env) envSmallVec.append(env->begin(), env->end());
-
-  SmallVector<std::optional<StringRef>> redirects;
-#ifdef _WIN32
-  redirects = {{}, {}, {}};
-#else
   auto tp = std::optional<StringRef>(temporaryPathRef);
   redirects = {tp, tp, tp};
 #endif
@@ -353,7 +355,7 @@ LogicalResult runTool(
   std::string errMsg;
   sys::ProcessStatistics stats;
   std::optional<sys::ProcessStatistics> optStats(stats);
-  int result = sys::ExecuteAndWait(program, pArgs, std::nullopt,
+  int result = sys::ExecuteAndWait(program, pArgs, envSmallVec,
                                    /* redirects */ redirects,
                                    /*SecondsToWait*/ 10, /*MemoryLimit*/ 0,
                                    &errMsg, &executionFailed, &optStats);
diff --git a/tests/matmul/requirements.txt b/tests/matmul/requirements.txt
index 62116914f..4b216f9a8 100644
--- a/tests/matmul/requirements.txt
+++ b/tests/matmul/requirements.txt
@@ -1,3 +1,4 @@
 PyYAML>=5.4.1
 requests>=2.28.0
-enum_tools==0.6.4
\ No newline at end of file
+enum_tools==0.6.4
+numpy<2
\ No newline at end of file

From 7d48f7979b7558241280d169f5ee94d8907c2eb6 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 03:11:02 -0500
Subject: [PATCH 08/28] disable some tests to check linux

---
 .github/workflows/ci-linux.yml | 42 +++-------------------------------
 1 file changed, 3 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 4b4ca2a53..7f341e7f1 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -152,41 +152,6 @@ jobs:
           # on the guthub CI machine.
           sudo prlimit -lunlimited --pid $$
 
-          bash build_tools/ci/run_matmul_test.sh \
-            test_matmuls \
-            iree-install \
-            $PWD/llvm-aie \
-            /opt/xilinx/xrt \
-            /opt/Xilinx/Vitis/2024.2
-
-
-      - name : Smoke E2E comparison flag test
-        run: |
-          source .venv/bin/activate
-          # install requirements
-          # TODO(newling) make requirements.txt file
-          pip install numpy
-          source /opt/xilinx/xrt/setup.sh
-          python3 build_tools/ci/cpu_comparison/run_test.py \
-            test_aie_vs_cpu \
-            iree-install \
-            $PWD/llvm-aie \
-            /opt/xilinx/xrt \
-            /opt/Xilinx/Vitis/2024.2 \
-            --reset_npu_between_runs=0 \
-            --test_set='Smoke' \
-            --do_not_run_aie=1 \
-            --verbose=0
-
-          # Assert that output.log is empty (because verbose=0)
-          if [ -s output.log ]; then
-            echo "output.log is not empty:"
-            cat output.log
-            exit 1
-          else
-            echo "output.log is empty"
-          fi
-
       - name : E2E comparison of AIE to llvm-cpu
         run: |
           source .venv/bin/activate
@@ -196,11 +161,10 @@ jobs:
           source /opt/xilinx/xrt/setup.sh
           python3 build_tools/ci/cpu_comparison/run_test.py \
             test_aie_vs_cpu \
-            iree-install \
+            $PWD/iree-install \
             $PWD/llvm-aie \
-            /opt/xilinx/xrt \
-            /opt/Xilinx/Vitis/2024.2 \
-            --reset-npu-between-runs
+            /opt/xilinx/xrt/lib \
+            --reset-npu-between-runs -v
 
       - name: Printing IR from aie2xclbin
         run: |

From b9bf3b379038f1a83766a4788a5670cb6a3154b6 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 16:05:57 -0500
Subject: [PATCH 09/28] refactor run_test and restore ci-linux

---
 .github/workflows/ci-linux.yml                | 44 +++++++++++---
 .github/workflows/ci-windows.yml              | 26 ++++++--
 build_tools/ci/cpu_comparison/run_test.py     | 19 ++++--
 .../print_ir_aie2xclbin.sh                    | 18 ++----
 build_tools/ci/run_matmul_test.sh             | 60 +++++++++++--------
 .../driver/xrt/native_executable.cc           | 11 +++-
 .../src/iree-amd-aie/driver/xrt/xrt_driver.cc | 17 +++---
 7 files changed, 128 insertions(+), 67 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 7f341e7f1..ad81ac71e 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -72,7 +72,8 @@ jobs:
         run: |
           rm -f iree-install/bin/clang*
           rm -f iree-install/bin/llvm-link*
-          tar cf iree-dist-linux.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test
+          cp ../iree-build/tools/testing/e2e/iree-e2e-matmul-test iree-install/bin
+          tar cf iree-dist-linux.tar -C iree-install .
 
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
@@ -152,26 +153,53 @@ jobs:
           # on the guthub CI machine.
           sudo prlimit -lunlimited --pid $$
 
+          source /opt/xilinx/xrt/setup.sh
+          bash build_tools/ci/run_matmul_test.sh \
+            test_matmuls \
+            iree-install \
+            $PWD/llvm-aie \
+            /opt/xilinx/xrt \
+            /opt/Xilinx/Vitis/2024.2
+
+
+      - name : Smoke E2E comparison flag test
+        run: |
+          source .venv/bin/activate
+          source /opt/xilinx/xrt/setup.sh
+          python3 build_tools/ci/cpu_comparison/run_test.py \
+            test_aie_vs_cpu \
+            iree-install \
+            $PWD/llvm-aie \
+            --xrt-dir /opt/xilinx/xrt \
+            --reset_npu_between_runs=0 \
+            --test_set='Smoke' \
+            --do_not_run_aie=1
+
+          # Assert that output.log is empty (because verbose=0)
+          if [ -s output.log ]; then
+            echo "output.log is not empty:"
+            cat output.log
+            exit 1
+          else
+            echo "output.log is empty"
+          fi
+
       - name : E2E comparison of AIE to llvm-cpu
         run: |
           source .venv/bin/activate
-          # install requirements
-          # TODO(newling) make requirements.txt file
-          pip install numpy
           source /opt/xilinx/xrt/setup.sh
           python3 build_tools/ci/cpu_comparison/run_test.py \
             test_aie_vs_cpu \
             $PWD/iree-install \
             $PWD/llvm-aie \
-            /opt/xilinx/xrt/lib \
+            --xrt-dir /opt/xilinx/xrt \
             --reset-npu-between-runs -v
 
       - name: Printing IR from aie2xclbin
         run: |
           source .venv/bin/activate
+          source /opt/xilinx/xrt/setup.sh
           bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \
             iree-install \
             print_ir_aie2xclbin_results \
-            $PWD/llvm-aie \
-            /opt/xilinx/xrt \
-            /opt/Xilinx/Vitis/2024.2
+            $PWD/llvm-aie
diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
index 6eec6de5d..ad16e54b8 100644
--- a/.github/workflows/ci-windows.yml
+++ b/.github/workflows/ci-windows.yml
@@ -88,7 +88,7 @@ jobs:
         run: |
           rm -f iree-install/bin/clang*
           rm -f iree-install/bin/llvm-link*
-          tar cf iree-dist-windows.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test.exe
+          tar cf iree-dist-windows.tar -C iree-install .
 
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
@@ -111,8 +111,6 @@ jobs:
     needs: build_and_ctest
     strategy:
       fail-fast: true
-    env:
-      XILINXD_LICENSE_FILE: /home/svcnod/Xilinx.lic
     steps:
       - name: "Checking out repository" # for test scripts
         uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
@@ -136,11 +134,29 @@ jobs:
           source .venv/Scripts/activate
           pip install -r tests/matmul/requirements.txt
 
+      - name: E2E correctness matmul test
+        run: |
+          source .venv/Scripts/activate
+          export XILINX_XRT=/c/Xilinx/XRT
+          bash build_tools/ci/run_matmul_test.sh \
+            /c/test_matmuls \
+            $PWD/iree-install \
+            $PWD/llvm-aie
+
       - name : E2E comparison of AIE to llvm-cpu
         run: |
           source .venv/Scripts/activate
+          export XILINX_XRT=/c/Xilinx/XRT
           python build_tools/ci/cpu_comparison/run_test.py \
             /c/test_aie_vs_cpu \
             $PWD/iree-install \
-            $PWD/llvm-aie \
-            /c/Users/maksim/dev_projects/XRT-MCDM/build/WRelease/xilinx/xrt -v
+            $PWD/llvm-aie -v
+
+      - name: Printing IR from aie2xclbin
+        run: |
+          source .venv/Scripts/activate
+          export XILINX_XRT=/c/Xilinx/XRT
+          bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \
+            $PWD/iree-install \
+            /c/print_ir_aie2xclbin_results \
+            $PWD/llvm-aie
diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 63b474efd..8e96809fd 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -63,6 +63,8 @@ def find_executable(install_dir: Path, executable_name):
 def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, env=None):
     if workdir is None:
         workdir = Path.cwd()
+    workdir = Path(workdir)
+    os.chdir(workdir)
     if not isinstance(cmd, list):
         cmd = [cmd]
     for i, c in enumerate(cmd):
@@ -79,9 +81,16 @@ def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, en
             _cmd = " ".join([f"{k}={v}" for k, v in env.items()]) + " " + _cmd
         print(f"Running the following command:\n{_cmd}")
 
-    handle = subprocess.run(cmd, capture_output=True, cwd=workdir, env=env)
-    stderr_decode = handle.stderr.decode("utf-8").strip()
-    stdout_decode = handle.stdout.decode("utf-8").strip()
+    handle = subprocess.Popen(
+        cmd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env=env,
+    )
+    stdout, stderr = handle.communicate()
+    stderr_decode = stderr.decode("utf-8").strip()
+    stdout_decode = stdout.decode("utf-8").strip()
     if verbose:
         if stdout_decode:
             print("Standard output from script:")
@@ -282,7 +291,7 @@ def __init__(
                 f"verbose must be a boolean or integer, not {type(verbose)}"
             )
 
-        if not get_component_log:
+        if not xrt_dir:
             return
 
         xrt_bin_dir = xrt_dir / "bin"
@@ -796,7 +805,7 @@ def all_tests(
     parser.add_argument("output_dir", type=abs_path)
     parser.add_argument("iree_install_dir", type=abs_path)
     parser.add_argument("peano_install_dir", type=abs_path)
-    parser.add_argument("xrt_dir", type=abs_path)
+    parser.add_argument("--xrt-dir", type=abs_path)
     parser.add_argument("--vitis-dir", type=abs_path)
 
     # TODO(newling) make bool options boolean, not integer (tried but had issues)
diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
index a0772efe4..a51502a33 100755
--- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
+++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
@@ -9,11 +9,12 @@
 set -euo pipefail
 
 # Check for the number of provided arguments
-if [ "$#" -ne 2 ] && [ "$#" -ne 5 ]; then
+if [ "$#" -ne 3 ] && [ "$#" -ne 5 ]; then
     echo -e "Illegal number of parameters: $#." \
             "\n For 2 parameters:" \
             "\n     1) <iree-compile-dir>" \
             "\n     2) <output-dir>" \
+            "\n     3) <peano-install-dir>" \
             "\n For 5 parameters:" \
             "\n     1) <iree-compile-dir>" \
             "\n     2) <output-dir>" \
@@ -35,9 +36,9 @@ OUTPUT=`realpath "${2}"`
 mkdir -p ${OUTPUT}
 
 # The CI case:
-if [ "$#" -eq 2 ]; then
-  echo "Assuming that this is the 'CI case' as 2 parameters were provided."
-  PEANO=/opt/llvm-aie
+if [ "$#" -eq 3 ]; then
+  echo "Assuming that this is the 'CI case' as 3 parameters were provided."
+  PEANO="$3"
   XRT=/opt/xilinx/xrt
   VITIS=/opt/Xilinx/Vitis/2024.2
 fi
@@ -47,7 +48,6 @@ echo "xchesscc: $(find $VITIS -name xchesscc)"
 
 # The local set-paths-manually case:
 if [ "$#" -eq 5 ]; then
-  PEANO="$3"
   XRT="$4"
   VITIS="$5"
 fi
@@ -85,16 +85,11 @@ fi
 
 if [ -d "${XRT}" ]; then
   XRT=`realpath "${XRT}"`
-else
-  echo "XRT does not exist: ${XRT}"
-  exit 1
+  source $XRT/setup.sh
 fi
 
 if [ -d "${VITIS}" ]; then
   VITIS=${VITIS}
-else
-  echo "VITIS does not exist: ${VITIS}"
-  exit 1
 fi
 
 # There might be a FileCheck program in the IREE_INSTALL_DIR. Check.
@@ -108,7 +103,6 @@ else
   exit 1
 fi
 
-source $XRT/setup.sh
 
 THIS="$(cd $(dirname $0) && pwd)"
 SOURCE_MLIR_FILE="${THIS}/linalg_matmul_f32.mlir"
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index c195f876f..19e90d7bc 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -65,11 +65,18 @@ fi
 IREE_COMPILE_EXE=""
 TEST_RUNNER=""
 for dir in "${IREE_INSTALL_DIR}" "${IREE_INSTALL_DIR}/bin" "${IREE_INSTALL_DIR}/tools"; do
+  echo "Looking in $dir"
   if [ -f "${dir}/iree-compile" ]; then
     IREE_COMPILE_EXE="${dir}/iree-compile"
   fi
-  if [ -f "${dir}/testing/e2e/iree-e2e-matmul-test" ]; then
-    TEST_RUNNER="${dir}/testing/e2e/iree-e2e-matmul-test"
+  if [ -f "${dir}/iree-compile.exe" ]; then
+    IREE_COMPILE_EXE="${dir}/iree-compile.exe"
+  fi
+  if [ -f "${dir}/iree-e2e-matmul-test" ]; then
+    TEST_RUNNER="${dir}/iree-e2e-matmul-test"
+  fi
+  if [ -f "${dir}/iree-e2e-matmul-test.exe" ]; then
+    TEST_RUNNER="${dir}/iree-e2e-matmul-test.exe"
   fi
 done
 
@@ -101,9 +108,8 @@ if [ -z "${4-}" ]; then
 else
   XRT_DIR=`realpath "$4"`
 fi
-if [ ! -d "${XRT_DIR}" ]; then
-  echo "No directory '${XRT_DIR}' (argument 4) found."
-  exit 1
+if [ -d "$XRT_DIR" ]; then
+  source $XRT_DIR/setup.sh
 fi
 
 # Parameter 5) <vitis-install-dir>
@@ -112,10 +118,6 @@ if [ -z "${5-}" ]; then
 else
   VITIS=`realpath "$5"`
 fi
-if [ ! -d "${VITIS}" ]; then
-  echo "No directory '${VITIS}' (argument 5) found."
-  exit 1
-fi
 
 THIS_DIR="$(cd $(dirname $0) && pwd)"
 ROOT_DIR="$(cd $THIS_DIR/../.. && pwd)"
@@ -127,7 +129,7 @@ if [ ! -f "${GENERATOR}" ]; then
   exit 1
 fi
 
-IREE_PYTHON3_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE:-python3}"
+IREE_PYTHON3_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE:-python}"
 if [ -z "$IREE_PYTHON3_EXECUTABLE" ]; then
   echo "IREE_PYTHON3_EXECUTABLE is not set."
   exit 1
@@ -137,7 +139,6 @@ fi
 
 GITHUB_ACTIONS="${GITHUB_ACTIONS:-false}"
 
-source $XRT_DIR/setup.sh
 # Circumvent xclbin security (no longer needed as of April 2024 XDNA driver)
 export XRT_HACK_UNSECURE_LOADING_XCLBIN=1
 
@@ -398,7 +399,6 @@ function run_matmul_test() {
                       --iree-amd-aie-show-invoked-commands"
 
   if [ $use_ukernel -ne 0 ]; then
-
     compilation_flags="${compilation_flags} \
                         --iree-amdaie-enable-ukernels=all"
   fi
@@ -527,12 +527,14 @@ run_matmul_test \
     --use_ukernel "0" \
     --num_repeat_runs "2"
 
-run_matmul_test \
-    --name_prefix "ukern" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "256"  --k "256" --n "256" \
-    --use_ukernel "1"
+if [ -d "$VITIS" ]; then
+  run_matmul_test \
+      --name_prefix "ukern" \
+      --lhs_rhs_type "bf16" \
+      --acc_type "f32" \
+      --m "256"  --k "256" --n "256" \
+      --use_ukernel "1"
+fi
 
 # Disabled until the following issue is resolved:
 # https://github.com/Xilinx/llvm-aie/issues/102
@@ -866,14 +868,15 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \
     --acc_type "i32" \
     --num_repeat_runs "2"
 
-run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \
-    --name_prefix "small" \
-    --lower_to_aie_pipeline "objectFifo" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --num_repeat_runs "2" \
-    --use_ukernel "1"
+if [ -d "$VITIS" ]; then
+  run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \
+      --name_prefix "small" \
+      --lower_to_aie_pipeline "objectFifo" \
+      --tile_pipeline "pack-peel" \
+      --lhs_rhs_type "bf16" \
+      --acc_type "f32" \
+      --num_repeat_runs "2" \
+      --use_ukernel "1"
 
 run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \
     --name_prefix "medium" \
@@ -883,11 +886,14 @@ run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \
     --acc_type "f32" \
     --num_repeat_runs "2" \
     --use_ukernel "1"
+fi
 
 ###################################################################
 # Chess tests
 ###################################################################
 
+if [ -d "$VITIS" ]; then
+
 run_matmul_test \
     --name_prefix "chess_i32_matmul" \
     --lhs_rhs_type "i32" \
@@ -909,6 +915,8 @@ run_matmul_test \
     --num_repeat_runs "10" \
     --use_ukernel "1"
 
+fi
+
 if [ $MATMUL_TESTS_FAILS -ne 0 ]; then
   echo "$MATMUL_TESTS_FAILS matmul tests failed! Scroll up and look for the 🦄 and 🐞..."
   exit 1
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index c5da9f217..cd7c7137f 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -184,14 +184,19 @@ iree_status_t iree_hal_xrt_native_executable_create(
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN load error: %s",
                               e.what());
     }
-    device->register_xclbin(*xclbin);
     try {
-      xrt::hw_context context(*device, xclbin->get_uuid());
+      device->register_xclbin(*xclbin);
+    } catch (std::runtime_error& e) {
+      return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN register error: %s",
+                              e.what());
+    }
+    xrt::hw_context context;
+    try {
+      context = {*device, xclbin->get_uuid()};
     } catch (std::runtime_error& e) {
       return iree_make_status(IREE_STATUS_INTERNAL,
                               "xrt::hw_context context: %s", e.what());
     }
-    xrt::hw_context context(*device, xclbin->get_uuid());
     uint32_t asm_instr_index =
         flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
     iree_amd_aie_hal_xrt_AsmInstDef_table_t asminst_def =
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index 0a2f24fe5..25cdddd50 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -55,12 +55,6 @@ static iree_hal_xrt_driver_t* iree_hal_xrt_driver_cast(
   return (iree_hal_xrt_driver_t*)base_value;
 }
 
-static const iree_hal_xrt_driver_t* iree_hal_xrt_driver_const_cast(
-    const iree_hal_driver_t* base_value) {
-  IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_xrt_driver_vtable);
-  return (const iree_hal_xrt_driver_t*)base_value;
-}
-
 static iree_status_t iree_hal_xrt_device_check_params(
     const iree_hal_xrt_device_params_t* params) {
   if (params->arena_block_size < 4096) {
@@ -86,7 +80,14 @@ iree_status_t iree_hal_xrt_driver_create_internal(
       (char*)driver + iree_sizeof_struct(*driver));
   driver->device_params = *device_params;
 
-  int device_count = xrt::system::enumerate_devices();
+  int device_count;
+  try {
+    device_count = xrt::system::enumerate_devices();
+  } catch (std::runtime_error& e) {
+    return iree_make_status(IREE_STATUS_INTERNAL,
+                            "xrt::system::enumerate_devices failed: %s",
+                            e.what());
+  }
   if (IREE_UNLIKELY(device_count == 0)) {
     return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
                             "No XRT devices found");
@@ -94,11 +95,11 @@ iree_status_t iree_hal_xrt_driver_create_internal(
   // Get handle to xrt device
   try {
     global_device = xrt::device(0);
+    driver->device = &global_device;
   } catch (std::runtime_error& e) {
     return iree_make_status(IREE_STATUS_INTERNAL, "xrt::device(0) failed: %s",
                             e.what());
   }
-  driver->device = &global_device;
   *out_driver = reinterpret_cast<iree_hal_driver_t*>(driver);
   return iree_ok_status();
 }

From b4f0838f8a593b8434ebe9144c22953ce6873ffe Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 17:21:28 -0500
Subject: [PATCH 10/28] undo temp peano hack

---
 build_tools/download_peano.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh
index 8c20a7560..70c8693f5 100644
--- a/build_tools/download_peano.sh
+++ b/build_tools/download_peano.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-RELEASE=19.0.0.2024081918+69415c19
+RELEASE=19.0.0.2024082221+90abe71b
 pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly
 unzip llvm_aie*whl

From a78deabe55dc55e7da1e005be55a6ad26e488109 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 17:24:29 -0500
Subject: [PATCH 11/28] undo more hacks

---
 .../print_ir_aie2xclbin.sh                    |  1 -
 build_tools/ci/run_matmul_test.sh             | 54 +++++++++----------
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
index a51502a33..edc70df08 100755
--- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
+++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
@@ -103,7 +103,6 @@ else
   exit 1
 fi
 
-
 THIS="$(cd $(dirname $0) && pwd)"
 SOURCE_MLIR_FILE="${THIS}/linalg_matmul_f32.mlir"
 
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index 19e90d7bc..290ca7c60 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -878,14 +878,14 @@ if [ -d "$VITIS" ]; then
       --num_repeat_runs "2" \
       --use_ukernel "1"
 
-run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \
-    --name_prefix "medium" \
-    --lower_to_aie_pipeline "objectFifo" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --num_repeat_runs "2" \
-    --use_ukernel "1"
+  run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \
+      --name_prefix "medium" \
+      --lower_to_aie_pipeline "objectFifo" \
+      --tile_pipeline "pack-peel" \
+      --lhs_rhs_type "bf16" \
+      --acc_type "f32" \
+      --num_repeat_runs "2" \
+      --use_ukernel "1"
 fi
 
 ###################################################################
@@ -894,26 +894,26 @@ fi
 
 if [ -d "$VITIS" ]; then
 
-run_matmul_test \
-    --name_prefix "chess_i32_matmul" \
-    --lhs_rhs_type "i32" \
-    --acc_type "i32" \
-    --m "32" \
-    --n "32" \
-    --k "32" \
-    --use_chess "1" \
-    --num_repeat_runs "10"
+  run_matmul_test \
+      --name_prefix "chess_i32_matmul" \
+      --lhs_rhs_type "i32" \
+      --acc_type "i32" \
+      --m "32" \
+      --n "32" \
+      --k "32" \
+      --use_chess "1" \
+      --num_repeat_runs "10"
 
-run_matmul_test \
-    --name_prefix "chess_bf16_ukernel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "64" \
-    --n "64" \
-    --k "64" \
-    --use_chess "1" \
-    --num_repeat_runs "10" \
-    --use_ukernel "1"
+  run_matmul_test \
+      --name_prefix "chess_bf16_ukernel" \
+      --lhs_rhs_type "bf16" \
+      --acc_type "f32" \
+      --m "64" \
+      --n "64" \
+      --k "64" \
+      --use_chess "1" \
+      --num_repeat_runs "10" \
+      --use_ukernel "1"
 
 fi
 

From aca8816669656592a8a4e3ee4c4d750b10583e20 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 17:28:57 -0500
Subject: [PATCH 12/28] undo even more hacks

---
 .../target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp     |  1 -
 .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp   |  2 +-
 .../src/iree-amd-aie/aie_runtime/iree_aie_configure.cc |  6 +-----
 runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc      | 10 ++++------
 tests/matmul/requirements.txt                          |  2 +-
 5 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
index ff5ca0da8..a769a3e7c 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp
@@ -59,7 +59,6 @@ void AMDAIEDialect::initializeAMDAIEAttrs() {
   addAttributes<
 #define GET_ATTRDEF_LIST
 #include "iree-amd-aie/IR/AMDAIEAttrs.cpp.inc"  // IWYU pragma: keeep
-
       >();
 }
 
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 0799b57f9..506ba795b 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -357,7 +357,7 @@ LogicalResult runTool(
   std::optional<sys::ProcessStatistics> optStats(stats);
   int result = sys::ExecuteAndWait(program, pArgs, envSmallVec,
                                    /* redirects */ redirects,
-                                   /*SecondsToWait*/ 10, /*MemoryLimit*/ 0,
+                                   /*SecondsToWait*/ 0, /*MemoryLimit*/ 0,
                                    &errMsg, &executionFailed, &optStats);
 
 #ifndef _WIN32
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index a46e8a729..6706eed68 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -15,9 +15,6 @@
 #include "iree_aie_router.h"
 #include "iree_aie_runtime.h"
 #include "llvm/ADT/StringExtras.h"
-#ifdef _WIN32
-#include "llvm/Support/Windows/WindowsSupport.h"
-#endif
 
 #define DEBUG_TYPE "iree-aie-cdo-emitter"
 
@@ -213,8 +210,7 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel,
 }
 
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
-                           const TileLoc &tileLoc, Path &elfPath,
-                           bool aieSim) {
+                           const TileLoc &tileLoc, Path &elfPath, bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
   if (!std::filesystem::exists(elfPath)) {
     llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n";
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index 25cdddd50..e51c10bac 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -80,18 +80,16 @@ iree_status_t iree_hal_xrt_driver_create_internal(
       (char*)driver + iree_sizeof_struct(*driver));
   driver->device_params = *device_params;
 
-  int device_count;
   try {
-    device_count = xrt::system::enumerate_devices();
+    if (IREE_UNLIKELY(xrt::system::enumerate_devices() == 0)) {
+      return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                              "No XRT devices found");
+    }
   } catch (std::runtime_error& e) {
     return iree_make_status(IREE_STATUS_INTERNAL,
                             "xrt::system::enumerate_devices failed: %s",
                             e.what());
   }
-  if (IREE_UNLIKELY(device_count == 0)) {
-    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                            "No XRT devices found");
-  }
   // Get handle to xrt device
   try {
     global_device = xrt::device(0);
diff --git a/tests/matmul/requirements.txt b/tests/matmul/requirements.txt
index 4b216f9a8..3f48cf4a4 100644
--- a/tests/matmul/requirements.txt
+++ b/tests/matmul/requirements.txt
@@ -1,4 +1,4 @@
 PyYAML>=5.4.1
 requests>=2.28.0
 enum_tools==0.6.4
-numpy<2
\ No newline at end of file
+numpy<2

From 9fb6823dc30eea5c4c1b1d8648f87928117cbbe9 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 17:44:45 -0500
Subject: [PATCH 13/28] copy stuff in build_test_cpp.sh

---
 .github/workflows/ci-linux.yml    | 3 ---
 .github/workflows/ci-windows.yml  | 2 --
 build_tools/ci/build_test_cpp.sh  | 4 ++++
 build_tools/ci/run_matmul_test.sh | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index ad81ac71e..2cbf6f540 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -70,9 +70,6 @@ jobs:
       - name: Create artifacts
         if: ${{ !cancelled() }}
         run: |
-          rm -f iree-install/bin/clang*
-          rm -f iree-install/bin/llvm-link*
-          cp ../iree-build/tools/testing/e2e/iree-e2e-matmul-test iree-install/bin
           tar cf iree-dist-linux.tar -C iree-install .
 
       - name: Upload artifacts
diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
index ad16e54b8..f33b64246 100644
--- a/.github/workflows/ci-windows.yml
+++ b/.github/workflows/ci-windows.yml
@@ -86,8 +86,6 @@ jobs:
       - name: Create artifacts
         if: ${{ !cancelled() }}
         run: |
-          rm -f iree-install/bin/clang*
-          rm -f iree-install/bin/llvm-link*
           tar cf iree-dist-windows.tar -C iree-install .
 
       - name: Upload artifacts
diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh
index f8fa87ba4..0d2fa2bea 100644
--- a/build_tools/ci/build_test_cpp.sh
+++ b/build_tools/ci/build_test_cpp.sh
@@ -116,3 +116,7 @@ fi
 
 # Show ccache stats.
 ccache --show-stats
+
+rm -f "$install_dir"/bin/clang*
+rm -f "$install_dir"/bin/llvm-link*
+cp "$build_dir"/tools/testing/e2e/iree-e2e-matmul-test "$install_dir"/bin
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index 290ca7c60..01fdff83b 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -450,7 +450,7 @@ function run_matmul_test() {
   echo "**** Running '${name}' matmul test ${total_num_runs} times (command ${COMMAND}) ****"
   for i in $(seq 1 $num_repeat_runs); do
     # Only reset NPU in CI to facilitate easier local testing without sudo access.
-    if [ "${GITHUB_ACTIONS}" = true ]; then
+    if [[ "$OSTYPE" == "linux-gnu"* ]] && [ "${GITHUB_ACTIONS}" = true ]; then
       echo "Reset NPU"
       bash $THIS_DIR/reset_npu.sh
     fi

From 29808939287f5d776c87d2c3b9f42081ae7a6d1c Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 18:24:40 -0500
Subject: [PATCH 14/28] use latest driver

---
 build_tools/ci/run_matmul_test.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index 01fdff83b..dc46740cc 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -95,7 +95,9 @@ fi
 if [ -z "${3-}" ]; then
   PEANO=/opt/llvm-aie
 else
-  PEANO=`realpath "$3"`
+  if [ -d "$PEANO" ]; then
+    PEANO=`realpath "$3"`
+  fi
 fi
 if [ ! -d "${PEANO}" ]; then
   echo "No directory '${PEANO}' (argument 3) found."
@@ -106,7 +108,9 @@ fi
 if [ -z "${4-}" ]; then
   XRT_DIR=/opt/xilinx/xrt
 else
-  XRT_DIR=`realpath "$4"`
+  if [ -d "$XRT_DIR" ]; then
+    XRT_DIR=`realpath "$4"`
+  fi
 fi
 if [ -d "$XRT_DIR" ]; then
   source $XRT_DIR/setup.sh
@@ -116,7 +120,9 @@ fi
 if [ -z "${5-}" ]; then
   VITIS=/opt/Xilinx/Vitis/2024.2
 else
-  VITIS=`realpath "$5"`
+  if [ -d "$VITIS" ]; then
+    VITIS=`realpath "$5"`
+  fi
 fi
 
 THIS_DIR="$(cd $(dirname $0) && pwd)"

From 4113c916871dcf5187eb215f5c2ab9298140bf07 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 19:28:23 -0500
Subject: [PATCH 15/28] Update run_matmul_test.sh

---
 build_tools/ci/run_matmul_test.sh | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index dc46740cc..01fdff83b 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -95,9 +95,7 @@ fi
 if [ -z "${3-}" ]; then
   PEANO=/opt/llvm-aie
 else
-  if [ -d "$PEANO" ]; then
-    PEANO=`realpath "$3"`
-  fi
+  PEANO=`realpath "$3"`
 fi
 if [ ! -d "${PEANO}" ]; then
   echo "No directory '${PEANO}' (argument 3) found."
@@ -108,9 +106,7 @@ fi
 if [ -z "${4-}" ]; then
   XRT_DIR=/opt/xilinx/xrt
 else
-  if [ -d "$XRT_DIR" ]; then
-    XRT_DIR=`realpath "$4"`
-  fi
+  XRT_DIR=`realpath "$4"`
 fi
 if [ -d "$XRT_DIR" ]; then
   source $XRT_DIR/setup.sh
@@ -120,9 +116,7 @@ fi
 if [ -z "${5-}" ]; then
   VITIS=/opt/Xilinx/Vitis/2024.2
 else
-  if [ -d "$VITIS" ]; then
-    VITIS=`realpath "$5"`
-  fi
+  VITIS=`realpath "$5"`
 fi
 
 THIS_DIR="$(cd $(dirname $0) && pwd)"

From 766d4297c0d295cca8a11aefc4ba8a6b81b1430d Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Thu, 22 Aug 2024 19:58:18 -0500
Subject: [PATCH 16/28] hardcode npu1 for now (different pcie id)

---
 .github/workflows/ci-linux.yml                |  5 ++--
 build_tools/ci/run_matmul_test.sh             | 26 ++++++++++++++-----
 .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp |  1 -
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 2cbf6f540..943fb0218 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -168,9 +168,8 @@ jobs:
             iree-install \
             $PWD/llvm-aie \
             --xrt-dir /opt/xilinx/xrt \
-            --reset_npu_between_runs=0 \
-            --test_set='Smoke' \
-            --do_not_run_aie=1
+            --test-set='Smoke' \
+            --do-not-run-aie
 
           # Assert that output.log is empty (because verbose=0)
           if [ -s output.log ]; then
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index 01fdff83b..c461e9002 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -144,6 +144,7 @@ export XRT_HACK_UNSECURE_LOADING_XCLBIN=1
 
 cd ${OUTPUT_DIR}
 
+export MATMUL_TESTS_RUN=0
 export MATMUL_TESTS_FAILS=0
 
 ###############################################################################
@@ -353,6 +354,8 @@ function run_matmul_test() {
     exit 1
   fi
 
+  export MATMUL_TESTS_RUN=$(( $MATMUL_TESTS_RUN+1 ))
+
   # Re-enable exit on failure:
   set -e
 
@@ -403,14 +406,14 @@ function run_matmul_test() {
                         --iree-amdaie-enable-ukernels=all"
   fi
 
+  set +e
+
   echo "**** Generating matmul .vmfb file for ${name} ****"
   ${IREE_COMPILE_EXE} "${matmul_ir}" \
     ${compilation_flags} -o "${matmul_vmfb}"
 
-
   compileResult=$?
 
-
   # Handle cases other than when compilation is expected to, and does, succeed:
   if [ $expect_compile_failure -ne 0 ]; then
     if [ $compileResult -ne 0 ]; then
@@ -418,22 +421,30 @@ function run_matmul_test() {
       return 0
     else
       echo "Expected compilation failure, got compilation success."
-      exit 1
+      export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 ))
+      return
     fi
   else
     if [ $compileResult -ne 0 ]; then
       echo "Expected compilation success, got compilation failure."
-      exit 1
+      export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 ))
+      return
     fi
   fi
 
   # Renable exit on failure:
-  set -e
   echo "**** Generating calls .vmfb file for ${name} ****"
   ${IREE_COMPILE_EXE} "${calls_ir}" \
       --iree-hal-target-backends=${target_backend} \
       -o "${calls_vmfb}"
 
+  return_status=$?
+  if [ $return_status -ne 0 ]; then
+    echo "'${name}' matmul compile failed!"
+    export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 ))
+    return
+  fi
+
   compiled_time=$(date +%s%3N)
 
   echo "**** Running '${name}' matmul tests ****"
@@ -444,8 +455,6 @@ function run_matmul_test() {
       --device=${device} \
       --max_elements_to_check=${max_elements_to_check}"
 
-  set +e
-
   total_num_runs=$(( num_repeat_runs * num_corruption_repeat_runs))
   echo "**** Running '${name}' matmul test ${total_num_runs} times (command ${COMMAND}) ****"
   for i in $(seq 1 $num_repeat_runs); do
@@ -917,6 +926,9 @@ if [ -d "$VITIS" ]; then
 
 fi
 
+echo "\n\n"
+
+echo "$MATMUL_TESTS_RUN matmul tests run!"
 if [ $MATMUL_TESTS_FAILS -ne 0 ]; then
   echo "$MATMUL_TESTS_FAILS matmul tests failed! Scroll up and look for the 🦄 and 🐞..."
   exit 1
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 506ba795b..057a68061 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -9,7 +9,6 @@
 #include <filesystem>
 #include <functional>
 #include <random>
-#include <fstream>
 #include <regex>
 #include <sstream>
 // ReSharper disable once CppUnusedIncludeDirective

From 4220c51306056c342466e79ae71dadc02a15e0ad Mon Sep 17 00:00:00 2001
From: makslevental <maksim.levental@gmail.com>
Date: Fri, 23 Aug 2024 10:30:03 -0500
Subject: [PATCH 17/28] try disable some tests

---
 build_tools/ci/cpu_comparison/run_test.py                | 8 +++++---
 runtime/src/iree-amd-aie/driver/xrt/native_executable.cc | 6 ++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 8e96809fd..95d30a77b 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -521,6 +521,7 @@ def aie_vs_llvm_cpu(
         return
 
     name = name_from_mlir_filename(test_file)
+    print(f"Running {name} test")
 
     input_args = generate_inputs(test_file, config.output_dir, seed)
 
@@ -653,9 +654,10 @@ def run(self, config):
         generate_matmul_test(
             test_name, template_name, 1024, 1024, 512, "bf16", "f32"
         )
-        aie_vs_llvm_cpu(
-            config, test_name, tile_pipeline="pack-peel", use_ukernel=True
-        )
+        if config.vitis_dir:
+            aie_vs_llvm_cpu(
+                config, test_name, tile_pipeline="pack-peel", use_ukernel=True
+            )
         aie_vs_llvm_cpu(
             config, test_name, tile_pipeline="pack-peel", use_ukernel=False
         )
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index cd7c7137f..19668c34d 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -279,8 +279,10 @@ static void iree_hal_xrt_native_executable_destroy(
 
   for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) {
     try {
-      // delete executable->entry_points[i].kernel;
-      // delete executable->entry_points[i].instr;
+#ifndef _WIN32
+      delete executable->entry_points[i].kernel;
+      delete executable->entry_points[i].instr;
+#endif
       // TODO(jornt): deleting the xclbin here will result in a corrupted size
       // error in XRT. It looks like the xclbin needs to stay alive while the
       // device is alive if it has been registered.

From 22a53cd51a7216e5b03879c6cdee1d479e76db7b Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Fri, 23 Aug 2024 19:22:26 -0500
Subject: [PATCH 18/28] cleanup

---
 .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp        | 4 +---
 runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc  | 3 ++-
 runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h   | 2 +-
 runtime/src/iree-amd-aie/driver/xrt/native_executable.cc    | 6 +++---
 runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc           | 2 +-
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 057a68061..2877c0e54 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -288,9 +288,6 @@ bool hasEnding(std::string const &fullString, std::string const &ending) {
   return false;
 }
 
-// Returns either:
-//  -- the output of running the tool, if run without failure, or
-//  -- an empty optional, if the tool fails to run.
 LogicalResult runTool(
     const std::string &program_, const std::vector<std::string> &args,
     bool verbose, std::optional<std::vector<std::string>> env = std::nullopt) {
@@ -339,6 +336,7 @@ LogicalResult runTool(
   SmallVector<std::optional<StringRef>> redirects;
 #ifdef _WIN32
   redirects = {{}, {}, {}};
+  // Explicit type but this never actually constructs an ArrayRef
   std::optional<ArrayRef<StringRef>> envSmallVec = std::nullopt;
 #else
   std::string temporaryPathStr =
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index 6706eed68..b59f7b725 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -210,7 +210,8 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel,
 }
 
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
-                           const TileLoc &tileLoc, Path &elfPath, bool aieSim) {
+                           const TileLoc &tileLoc, const Path &elfPath,
+                           bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
   if (!std::filesystem::exists(elfPath)) {
     llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n";
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
index f738cdd07..94aabf6d4 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
@@ -124,7 +124,7 @@ LogicalResult generateCDOBinary(const std::filesystem::path &outputPath,
 /// CDO.
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
                            const TileLoc &tileLoc,
-                           std::filesystem::path &elfPath, bool aieSim);
+                           const std::filesystem::path &elfPath, bool aieSim);
 
 /// Turn off and turn it back on again...
 LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel,
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index 19668c34d..df6f60903 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -180,20 +180,20 @@ iree_status_t iree_hal_xrt_native_executable_create(
     std::unique_ptr<xrt::xclbin> xclbin;
     try {
       xclbin = std::make_unique<xrt::xclbin>(xclbinVector);
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN load error: %s",
                               e.what());
     }
     try {
       device->register_xclbin(*xclbin);
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN register error: %s",
                               e.what());
     }
     xrt::hw_context context;
     try {
       context = {*device, xclbin->get_uuid()};
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL,
                               "xrt::hw_context context: %s", e.what());
     }
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index e51c10bac..3c9effd98 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -126,8 +126,8 @@ static void iree_hal_xrt_driver_destroy(iree_hal_driver_t* base_driver) {
   iree_allocator_free(host_allocator, driver);
 
   IREE_TRACE_ZONE_END(z0);
-  return;
 }
+
 static iree_status_t iree_hal_xrt_driver_dump_device_info(
     iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
     iree_string_builder_t* builder) {

From 8317bf9b272a261f38cf2e011892ccd28d8dfecf Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Fri, 23 Aug 2024 19:59:29 -0500
Subject: [PATCH 19/28] fix mistake (hopefully)

---
 build_tools/ci/cpu_comparison/run_test.py            |  2 +-
 .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp |  4 ++--
 .../iree-amd-aie/aie_runtime/iree_aie_configure.cc   |  3 +--
 .../iree-amd-aie/aie_runtime/iree_aie_configure.h    |  2 +-
 .../src/iree-amd-aie/driver/xrt/native_executable.cc | 12 +++++-------
 runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc    |  2 +-
 6 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index 95d30a77b..a3e630e33 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -628,7 +628,7 @@ def run(self, config):
         generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32")
         aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0)
 
-        if config.xdna_datetime and config.xdna_datetime < 20240819:
+        if config.xdna_datetime and config.xdna_datetime < 20240801:
             for name in [
                 "two_matmul_switching",
                 "matmul_f32_8_8_4",
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 2877c0e54..78928d358 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -1203,8 +1203,8 @@ LogicalResult aie2xclbin(
   Path unifiedObj = tempDirPath / "input.o";
   if (failed(generateUnifiedObject(
           ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll,
-          printIRModuleScope, timing, useChess, verbose, tempDirPath,
-          vitisDirPath, targetArch, peanoDirPath, npuVersion))) {
+          printIRModuleScope, timing, useChess, verbose, tempDirPath, vitisDirPath,
+          targetArch, peanoDirPath, npuVersion))) {
     llvm::errs() << "Failed to generate unified object\n";
     return failure();
   }
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index b59f7b725..6706eed68 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -210,8 +210,7 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel,
 }
 
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
-                           const TileLoc &tileLoc, const Path &elfPath,
-                           bool aieSim) {
+                           const TileLoc &tileLoc, Path &elfPath, bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
   if (!std::filesystem::exists(elfPath)) {
     llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n";
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
index 94aabf6d4..f738cdd07 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
@@ -124,7 +124,7 @@ LogicalResult generateCDOBinary(const std::filesystem::path &outputPath,
 /// CDO.
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
                            const TileLoc &tileLoc,
-                           const std::filesystem::path &elfPath, bool aieSim);
+                           std::filesystem::path &elfPath, bool aieSim);
 
 /// Turn off and turn it back on again...
 LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel,
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index df6f60903..cd7c7137f 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -180,20 +180,20 @@ iree_status_t iree_hal_xrt_native_executable_create(
     std::unique_ptr<xrt::xclbin> xclbin;
     try {
       xclbin = std::make_unique<xrt::xclbin>(xclbinVector);
-    } catch (std::exception& e) {
+    } catch (std::runtime_error& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN load error: %s",
                               e.what());
     }
     try {
       device->register_xclbin(*xclbin);
-    } catch (std::exception& e) {
+    } catch (std::runtime_error& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN register error: %s",
                               e.what());
     }
     xrt::hw_context context;
     try {
       context = {*device, xclbin->get_uuid()};
-    } catch (std::exception& e) {
+    } catch (std::runtime_error& e) {
       return iree_make_status(IREE_STATUS_INTERNAL,
                               "xrt::hw_context context: %s", e.what());
     }
@@ -279,10 +279,8 @@ static void iree_hal_xrt_native_executable_destroy(
 
   for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) {
     try {
-#ifndef _WIN32
-      delete executable->entry_points[i].kernel;
-      delete executable->entry_points[i].instr;
-#endif
+      // delete executable->entry_points[i].kernel;
+      // delete executable->entry_points[i].instr;
       // TODO(jornt): deleting the xclbin here will result in a corrupted size
       // error in XRT. It looks like the xclbin needs to stay alive while the
       // device is alive if it has been registered.
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index 3c9effd98..e51c10bac 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -126,8 +126,8 @@ static void iree_hal_xrt_driver_destroy(iree_hal_driver_t* base_driver) {
   iree_allocator_free(host_allocator, driver);
 
   IREE_TRACE_ZONE_END(z0);
+  return;
 }
-
 static iree_status_t iree_hal_xrt_driver_dump_device_info(
     iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
     iree_string_builder_t* builder) {

From 9fffbc09b5d949c0d9f66c33a4046f95567b56a7 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Fri, 23 Aug 2024 20:02:08 -0500
Subject: [PATCH 20/28] incorporate comments

---
 runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc | 3 ++-
 runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h  | 2 +-
 runtime/src/iree-amd-aie/driver/xrt/native_executable.cc   | 6 +++---
 runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc          | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
index 6706eed68..b59f7b725 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc
@@ -210,7 +210,8 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel,
 }
 
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
-                           const TileLoc &tileLoc, Path &elfPath, bool aieSim) {
+                           const TileLoc &tileLoc, const Path &elfPath,
+                           bool aieSim) {
   auto devInst = const_cast<XAie_DevInst *>(&deviceModel.devInst);
   if (!std::filesystem::exists(elfPath)) {
     llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n";
diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
index f738cdd07..94aabf6d4 100644
--- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
+++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h
@@ -124,7 +124,7 @@ LogicalResult generateCDOBinary(const std::filesystem::path &outputPath,
 /// CDO.
 LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel,
                            const TileLoc &tileLoc,
-                           std::filesystem::path &elfPath, bool aieSim);
+                           const std::filesystem::path &elfPath, bool aieSim);
 
 /// Turn off and turn it back on again...
 LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel,
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index cd7c7137f..f4f990ab2 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -180,20 +180,20 @@ iree_status_t iree_hal_xrt_native_executable_create(
     std::unique_ptr<xrt::xclbin> xclbin;
     try {
       xclbin = std::make_unique<xrt::xclbin>(xclbinVector);
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN load error: %s",
                               e.what());
     }
     try {
       device->register_xclbin(*xclbin);
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN register error: %s",
                               e.what());
     }
     xrt::hw_context context;
     try {
       context = {*device, xclbin->get_uuid()};
-    } catch (std::runtime_error& e) {
+    } catch (std::exception& e) {
       return iree_make_status(IREE_STATUS_INTERNAL,
                               "xrt::hw_context context: %s", e.what());
     }
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index e51c10bac..3c9effd98 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -126,8 +126,8 @@ static void iree_hal_xrt_driver_destroy(iree_hal_driver_t* base_driver) {
   iree_allocator_free(host_allocator, driver);
 
   IREE_TRACE_ZONE_END(z0);
-  return;
 }
+
 static iree_status_t iree_hal_xrt_driver_dump_device_info(
     iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id,
     iree_string_builder_t* builder) {

From 489fec70e8bbc8f685349eb287c60a76c5af267c Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Fri, 23 Aug 2024 21:30:49 -0500
Subject: [PATCH 21/28] xfail 1536 medium test on windows

---
 build_tools/ci/cpu_comparison/run_test.py | 1 +
 build_tools/ci/run_matmul_test.sh         | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
index a3e630e33..3ff4a05a5 100755
--- a/build_tools/ci/cpu_comparison/run_test.py
+++ b/build_tools/ci/cpu_comparison/run_test.py
@@ -266,6 +266,7 @@ def __init__(
         self.iree_run_exe = iree_run_exe
         self.return_on_fail = return_on_fail
         self.verbose = verbose
+        self.xdna_datetime = None
         self.reset_npu_between_runs = reset_npu_between_runs
         self.do_not_run_aie = do_not_run_aie
         self.additional_aie_compilation_flags = additional_aie_compilation_flags
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index c461e9002..dfa76d44c 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -812,6 +812,13 @@ run_matmul_test_on_shapes ${i32_shapes_small[@]} \
     --acc_type "i32" \
     --num_repeat_runs "10"
 
+i32_shapes_medium=(
+  '1024x1024x1024'
+)
+if [ "$OSTYPE" != "msys" ]; then
+  i32_shapes_medium+=('1536x2048x1536')
+fi
+
 run_matmul_test_on_shapes ${i32_shapes_medium[@]} \
     --name_prefix "medium" \
     --lower_to_aie_pipeline "objectFifo" \

From 612d5d0210062eaaa600d35edaee22ffd8bf367d Mon Sep 17 00:00:00 2001
From: makslevental <maksim.levental@gmail.com>
Date: Sat, 24 Aug 2024 12:20:36 -0500
Subject: [PATCH 22/28] remove dead code

---
 runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc | 7 -------
 runtime/src/iree-amd-aie/driver/xrt/xrt_device.h  | 4 ----
 2 files changed, 11 deletions(-)

diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc
index 5e01954f3..0a2a199c1 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc
@@ -58,13 +58,6 @@ void iree_hal_xrt_device_params_initialize(
   out_params->arena_block_size = 32 * 1024;
 }
 
-const iree_hal_xrt_device_params_t* iree_hal_xrt_device_params(
-    const iree_hal_device_t* base_device) {
-  const iree_hal_xrt_device_t* device =
-      iree_hal_xrt_device_const_cast(base_device);
-  return &device->params;
-}
-
 static iree_status_t iree_hal_xrt_device_create_internal(
     iree_string_view_t identifier, xrt::device *xrt_device,
     const iree_hal_xrt_device_params_t* params, iree_allocator_t host_allocator,
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h
index 7c887610c..e55db9962 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h
@@ -31,10 +31,6 @@ iree_status_t iree_hal_xrt_device_create(
     xrt::device* device, iree_allocator_t host_allocator,
     iree_hal_device_t** out_device);
 
-// Returns the parameters used for creating the device.
-const iree_hal_xrt_device_params_t* iree_hal_xrt_device_params(
-    const iree_hal_device_t* device);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus

From f37162931cca7a7cc297257053200c42044fcbb8 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Sat, 24 Aug 2024 10:53:48 -0700
Subject: [PATCH 23/28] Update ci-linux.yml

remove linux-phoenix-20240606
---
 .github/workflows/ci-linux.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 943fb0218..a7b2c4694 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -93,7 +93,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        runs-on: [linux-phoenix-20240606, linux-phoenix-20240819]
+        runs-on: [linux-phoenix-20240819]
     runs-on: ${{ matrix.runs-on }}
     env:
       XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic

From f60f9f039926130c72b6a4b679dea7384d7fd6b1 Mon Sep 17 00:00:00 2001
From: makslevental <maksim.levental@gmail.com>
Date: Mon, 26 Aug 2024 12:33:34 -0500
Subject: [PATCH 24/28] address my own comments

---
 .github/workflows/ci-linux.yml                     |  1 +
 build_tools/ci/build_test_cpp.sh                   |  4 ++--
 .../iree-amd-aie/Target/AMDAIETargetCDODirect.cpp  |  4 ++--
 .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp      | 14 ++++----------
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index a7b2c4694..a3a0bbe57 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -189,6 +189,7 @@ jobs:
             $PWD/iree-install \
             $PWD/llvm-aie \
             --xrt-dir /opt/xilinx/xrt \
+            --vitis-dir /opt/Xilinx/Vitis/2024.2 \
             --reset-npu-between-runs -v
 
       - name: Printing IR from aie2xclbin
diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh
index 0d2fa2bea..3e98d4c40 100644
--- a/build_tools/ci/build_test_cpp.sh
+++ b/build_tools/ci/build_test_cpp.sh
@@ -32,7 +32,7 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then
   export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake"
   export CC=clang
   export CXX=clang++
-else
+elif [[ "$OSTYPE" == "msys"* ]]; then
   export CC=clang-cl.exe
   export CXX=clang-cl.exe
 fi
@@ -109,7 +109,7 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then
   ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j
 elif [[ "$OSTYPE" == "darwin"* ]]; then
   ctest --test-dir "$build_dir" -R amd-aie -E "pack_peel_pipeline_matmul|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5
-else
+elif [[ "$OSTYPE" == "msys"* ]]; then
   # hack while windows is flaky to get past failing tests
   ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j --repeat until-pass:5
 fi
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
index 76f289cb8..29216d069 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp
@@ -168,8 +168,8 @@ LogicalResult addAieElfsToCDO(const AMDAIEDeviceModel &deviceModel,
       else
         fileName = "core_" + std::to_string(tileLoc.col) + "_" +
                    std::to_string(tileLoc.row) + ".elf";
-      Path elfPath = workDirPath / fileName;
-      if (failed(addElfToTile(deviceModel, tileLoc, elfPath, aieSim))) {
+      if (failed(addElfToTile(deviceModel, tileLoc, workDirPath / fileName,
+                              aieSim))) {
         return failure();
       }
     }
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 78928d358..17549f214 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -291,15 +291,9 @@ bool hasEnding(std::string const &fullString, std::string const &ending) {
 LogicalResult runTool(
     const std::string &program_, const std::vector<std::string> &args,
     bool verbose, std::optional<std::vector<std::string>> env = std::nullopt) {
-  std::string program;
+  std::string program = program_;
 #if defined(_WIN32)
-  if (hasEnding(program_, ".exe")) {
-    program = program_;
-  } else {
-    program = program_ + ".exe";
-  }
-#else
-  program = program_;
+  if (!hasEnding(program_, ".exe")) program = program_ + ".exe";
 #endif  // _WIN32
   if (verbose) {
     llvm::outs() << "\nRun: ";
@@ -1203,8 +1197,8 @@ LogicalResult aie2xclbin(
   Path unifiedObj = tempDirPath / "input.o";
   if (failed(generateUnifiedObject(
           ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll,
-          printIRModuleScope, timing, useChess, verbose, tempDirPath, vitisDirPath,
-          targetArch, peanoDirPath, npuVersion))) {
+          printIRModuleScope, timing, useChess, verbose, tempDirPath,
+          vitisDirPath, targetArch, peanoDirPath, npuVersion))) {
     llvm::errs() << "Failed to generate unified object\n";
     return failure();
   }

From 6dd3e50a2420c63cb6badf58119e91032d80d334 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Mon, 26 Aug 2024 13:33:45 -0500
Subject: [PATCH 25/28] fix darwin cxx

---
 build_tools/ci/build_test_cpp.sh | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh
index 3e98d4c40..7886b856b 100644
--- a/build_tools/ci/build_test_cpp.sh
+++ b/build_tools/ci/build_test_cpp.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-set -eu -o errtrace
+set -eux -o errtrace
 
 this_dir="$(cd $(dirname $0) && pwd)"
 repo_root="$(cd $this_dir/../.. && pwd)"
@@ -60,19 +60,10 @@ echo '{
 
 cd $iree_dir
 CMAKE_ARGS="\
-  -S $iree_dir \
-  -B $build_dir \
   -GNinja \
   -DCMAKE_BUILD_TYPE=Release \
   -DCMAKE_INSTALL_PREFIX=$install_dir \
   -DCMAKE_INSTALL_LIBDIR=lib \
-  -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
-  -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \
-  -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
-  -DCMAKE_C_COMPILER="${CC}" \
-  -DCMAKE_CXX_COMPILER="${CXX}" \
-  -DLLVM_TARGET_ARCH=X86 \
-  -DLLVM_TARGETS_TO_BUILD=X86 \
   -DIREE_ENABLE_ASSERTIONS=ON \
   -DIREE_BUILD_SAMPLES=OFF \
   -DIREE_BUILD_PYTHON_BINDINGS=ON \
@@ -89,11 +80,21 @@ CMAKE_ARGS="\
   -DIREE_CMAKE_PLUGIN_PATHS=$PWD/../iree-amd-aie"
 
 if [[ "$OSTYPE" != "darwin"* ]]; then
-  CMAKE_ARGS="$CMAKE_ARGS -DIREE_EXTERNAL_HAL_DRIVERS=xrt"
+  cmake $CMAKE_ARGS \
+    -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+    -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+    -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \
+    -DCMAKE_C_COMPILER="${CC}" \
+    -DCMAKE_CXX_COMPILER="${CXX}" \
+    -DLLVM_TARGET_ARCH=X86 \
+    -DLLVM_TARGETS_TO_BUILD=X86 \
+    -DIREE_EXTERNAL_HAL_DRIVERS=xrt \
+    -S $iree_dir -B $build_dir
+else
+  cmake $CMAKE_ARGS \
+    -S $iree_dir -B $build_dir
 fi
 
-cmake $CMAKE_ARGS
-
 echo "Building all"
 echo "------------"
 cmake --build "$build_dir" -- -k 0

From 5c7704eca395231a3700378612b52f614e97da3f Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 28 Aug 2024 18:07:54 -0500
Subject: [PATCH 26/28] rename runner

---
 .github/workflows/ci-windows.yml | 2 +-
 build_tools/ci/build_test_cpp.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
index f33b64246..4804d14eb 100644
--- a/.github/workflows/ci-windows.yml
+++ b/.github/workflows/ci-windows.yml
@@ -105,7 +105,7 @@ jobs:
 
   test_windows:
     name: E2E Test windows
-    runs-on: mlevental-win11-pro
+    runs-on: windows-phoenix
     needs: build_and_ctest
     strategy:
       fail-fast: true
diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh
index 7886b856b..65c61332a 100644
--- a/build_tools/ci/build_test_cpp.sh
+++ b/build_tools/ci/build_test_cpp.sh
@@ -77,7 +77,7 @@ CMAKE_ARGS="\
   -DIREE_INPUT_STABLEHLO=OFF \
   -DIREE_INPUT_TORCH=OFF \
   -DCMAKE_OBJECT_PATH_MAX=4096 \
-  -DIREE_CMAKE_PLUGIN_PATHS=$PWD/../iree-amd-aie"
+  -DIREE_CMAKE_PLUGIN_PATHS=$repo_root"
 
 if [[ "$OSTYPE" != "darwin"* ]]; then
   cmake $CMAKE_ARGS \

From 627f7e1934ddb667e390b87db9022f115d98723e Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 28 Aug 2024 22:10:37 -0500
Subject: [PATCH 27/28] fix after rebase

---
 .github/workflows/ci-linux.yml                             | 2 +-
 .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp       | 5 ++++-
 runtime/src/iree-amd-aie/driver/xrt/native_executable.cc   | 7 +++++--
 runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc          | 4 ++--
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index a3a0bbe57..100ea0eca 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -93,7 +93,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        runs-on: [linux-phoenix-20240819]
+        runs-on: [linux-phoenix]
     runs-on: ${{ matrix.runs-on }}
     env:
       XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
index 17549f214..455416f2d 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
@@ -639,7 +639,10 @@ static LogicalResult generateCoreElfFiles(
     if (verbose) flags.emplace_back("-v");
     // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected
     // automatically into the ld.lld invocation
-    return runTool((peanoDir / "bin" / "clang").string(), flags, verbose);
+    if (failed(
+            runTool((peanoDir / "bin" / "clang").string(), flags, verbose))) {
+      return failure();
+    }
   }
   return success();
 }
diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
index f4f990ab2..582a789fa 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
@@ -279,8 +279,11 @@ static void iree_hal_xrt_native_executable_destroy(
 
   for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) {
     try {
-      // delete executable->entry_points[i].kernel;
-      // delete executable->entry_points[i].instr;
+#ifndef _WIN32
+      // causes segmentation fault on windows
+      delete executable->entry_points[i].kernel;
+      delete executable->entry_points[i].instr;
+#endif
       // TODO(jornt): deleting the xclbin here will result in a corrupted size
       // error in XRT. It looks like the xclbin needs to stay alive while the
       // device is alive if it has been registered.
diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
index 3c9effd98..6dd8feb0a 100644
--- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc
@@ -85,7 +85,7 @@ iree_status_t iree_hal_xrt_driver_create_internal(
       return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
                               "No XRT devices found");
     }
-  } catch (std::runtime_error& e) {
+  } catch (std::exception& e) {
     return iree_make_status(IREE_STATUS_INTERNAL,
                             "xrt::system::enumerate_devices failed: %s",
                             e.what());
@@ -94,7 +94,7 @@ iree_status_t iree_hal_xrt_driver_create_internal(
   try {
     global_device = xrt::device(0);
     driver->device = &global_device;
-  } catch (std::runtime_error& e) {
+  } catch (std::exception& e) {
     return iree_make_status(IREE_STATUS_INTERNAL, "xrt::device(0) failed: %s",
                             e.what());
   }

From cac317d73c7f6fd98f8469122c820ca32e12d4a9 Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental@gmail.com>
Date: Wed, 28 Aug 2024 22:45:18 -0500
Subject: [PATCH 28/28] disable aie-rt warnings

---
 cmake/iree_aie_bootgen.cmake | 21 ++-------------------
 cmake/iree_aie_rt.cmake      |  4 +---
 cmake/iree_aie_xrt.cmake     |  8 ++++----
 3 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/cmake/iree_aie_bootgen.cmake b/cmake/iree_aie_bootgen.cmake
index 11238be4c..a834c9427 100644
--- a/cmake/iree_aie_bootgen.cmake
+++ b/cmake/iree_aie_bootgen.cmake
@@ -30,25 +30,8 @@ if(WIN32)
   target_compile_definitions(iree-aie-bootgen PUBLIC YY_NO_UNISTD_H)
 endif()
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
-  set(_bootgen_c_warning_ignores
-      -Wno-cast-qual
-      -Wno-covered-switch-default
-      -Wno-date-time
-      -Wno-deprecated-declarations
-      -Wno-deprecated-register
-      -Wno-dynamic-class-memaccess
-      -Wno-format
-      -Wno-implicit-fallthrough
-      -Wno-incompatible-function-pointer-types
-      -Wno-incompatible-pointer-types-discards-qualifiers
-      -Wno-misleading-indentation
-      -Wno-pointer-bool-conversion
-      -Wno-sign-compare
-      -Wno-tautological-overlap-compare
-      -Wno-unused)
-  set(_bootgen_cxx_warning_ignores
-      -Wno-deprecated-copy -Wno-non-virtual-dtor -Wno-overloaded-virtual
-      -Wno-register -Wno-reorder -Wno-suggest-override)
+  set(_bootgen_c_warning_ignores -w)
+  set(_bootgen_cxx_warning_ignores -w -Wno-register)
 endif()
 
 target_compile_options(iree-aie-bootgen PRIVATE
diff --git a/cmake/iree_aie_rt.cmake b/cmake/iree_aie_rt.cmake
index 96b9791d2..dcdb7c204 100644
--- a/cmake/iree_aie_rt.cmake
+++ b/cmake/iree_aie_rt.cmake
@@ -254,9 +254,7 @@ set_target_properties(
   PROPERTIES COMPILE_OPTIONS "${_aie_runtime_compile_options}")
 target_compile_definitions(xaiengine PRIVATE ${XAIE_DEBUG} __AIECDO__ XAIE_FEATURE_ALL)
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
-  set(xaiengine_c_warning_ignores
-      -Wno-unused-but-set-variable
-      -Wno-incompatible-pointer-types)
+  set(xaiengine_c_warning_ignores -w)
   target_compile_options(xaiengine PRIVATE ${xaiengine_c_warning_ignores})
 endif()
 # For <elf.h>
diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake
index 2db2521fa..9606573f8 100644
--- a/cmake/iree_aie_xrt.cmake
+++ b/cmake/iree_aie_xrt.cmake
@@ -130,8 +130,8 @@ target_include_directories(iree-aie-xclbinutil
                                    ${_xclbinutil_source_dir})
 target_compile_options(iree-aie-xclbinutil
                        PRIVATE
-                       $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
-                       $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
+                       $<$<PLATFORM_ID:Linux>:-fexceptions -frtti -w>
+                       $<$<PLATFORM_ID:Windows>:/EHsc /GR /w>)
 set_target_properties(iree-aie-xclbinutil
                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tools")
 
@@ -181,8 +181,8 @@ foreach(_core_lib IN LISTS _core_libs)
   target_compile_definitions(${_core_lib} PUBLIC -DBOOST_BIND_GLOBAL_PLACEHOLDERS)
   target_compile_options(${_core_lib}
                          PRIVATE
-                         $<$<PLATFORM_ID:Linux>:-fexceptions -frtti>
-                         $<$<PLATFORM_ID:Windows>:/EHsc /GR>)
+                         $<$<PLATFORM_ID:Linux>:-fexceptions -frtti -w>
+                         $<$<PLATFORM_ID:Windows>:/EHsc /GR /w>)
   target_link_libraries(${_core_lib} PUBLIC $<BUILD_LOCAL_INTERFACE:${IREE_AIE_BOOST_LIBS}>)
 endforeach()