intel
diff --git a/‎build_tools/patches/fa_support.patch‎
Lines changed: 52 additions & 0 deletions b/‎build_tools/patches/fa_support.patch‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎lib/ExecutionEngine/ImexRunnerUtils.cpp‎
Lines changed: 15 additions & 2 deletions b/‎lib/ExecutionEngine/ImexRunnerUtils.cpp‎
Lines changed: 15 additions & 2 deletions
@@ -0,0 +1,52 @@
+diff --git a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
+index 9ead1d89069d..3822d24c8579 100644
+--- a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
++++ b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
+@@ -189,9 +189,10 @@ class CreateNdDescToXeVMPattern
+     // If source is a memref, we need to extract the aligned pointer as index.
+     // Pointer type is passed as i32 or i64 by type converter.
+     if (sourceMemrefTy) {
+-      if (!sourceMemrefTy.hasStaticShape()) {
+-        return rewriter.notifyMatchFailure(op, "Expected static memref shape.");
+-      }
++      // if (!sourceMemrefTy.hasStaticShape()) {
++      //   return rewriter.notifyMatchFailure(op, "Expected static memref
++      //   shape.");
++      // }
+       baseAddr =
+           memref::ExtractAlignedPointerAsIndexOp::create(rewriter, loc, source);
+     } else {
+diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+index e95338f7d18b..2615d225dc1d 100644
+--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
++++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+@@ -348,6 +349,9 @@ private:
+ /// d1) and return vector<16x2x64>
+ static VectorType getDistributedType(VectorType originalType, AffineMap map,
+                                      int64_t warpSize) {
++  // If the map has zero results, that means no distribution.
++  if (map.getNumResults() == 0)
++    return originalType;
+   SmallVector<int64_t> targetShape(originalType.getShape());
+   for (unsigned i = 0, e = map.getNumResults(); i < e; i++) {
+     unsigned position = map.getDimPosition(i);
+diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+index f1dbc5ddb202..3023c65d4bc3 100644
+--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
++++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+@@ -1506,9 +1506,14 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
+     if (!layout)
+       return AffineMap::getMultiDimMapWithTargets(
+           vecRank, {static_cast<unsigned int>(vecRank - 1)}, val.getContext());
++    // Expecting layout and vector rank to match.
++    assert(layout.getRank() == vecRank &&
++           "vector rank and layout rank must match");
++    // A dimension is distributed if its layout value is > 1 and the dimension
++    // size is evenly divisible by the layout value.
+     SmallVector<unsigned int> distributedDims;
+     for (auto [i, v] : llvm::enumerate(layout.getEffectiveLaneLayoutAsInt())) {
+-      if (v > 1)
++      if (v > 1 && vecType.getShape()[i] % v == 0)
+         distributedDims.push_back(i);
+     }
+     return AffineMap::getMultiDimMapWithTargets(vecRank, distributedDims,
@@ -19,6 +19,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
+#include <limits>
 #include <random>
 
 // NOLINTBEGIN(*-identifier-naming)
@@ -223,21 +224,33 @@ void _mlir_ciface_printMaxError(UnrankedMemRefType<T> *M,
   std::pair<double, DynamicMemRefIterator<T>> max_rel_err_idx{0.0, DM.begin()};
   std::pair<double, DynamicMemRefIterator<T>> max_abs_err_idx{0.0, DM.begin()};
   uint64_t idx = 0;
+  double max_rel_error_i = std::numeric_limits<double>::infinity(),
+         max_rel_error_j = std::numeric_limits<double>::infinity();
+  double max_abs_error_i = std::numeric_limits<double>::infinity(),
+         max_abs_error_j = std::numeric_limits<double>::infinity();
   for (; i != DM.end() && j != DN.end(); ++i, ++j, ++idx) {
     const double i_val = getFloat(*i);
     const double j_val = getFloat(*j);
     const double delta = fabs(i_val - j_val);
     const double rel_error = delta / fmax(fabs(i_val), fabs(j_val));
-    if (delta > max_abs_err_idx.first)
+    if (delta > max_abs_err_idx.first) {
       max_abs_err_idx = {delta, i};
-    if (rel_error > max_rel_err_idx.first)
+      max_abs_error_i = i_val;
+      max_abs_error_j = j_val;
+    }
+    if (rel_error > max_rel_err_idx.first) {
       max_rel_err_idx = {rel_error, i};
+      max_rel_error_i = i_val;
+      max_rel_error_j = j_val;
+    }
   }
   std::cout << "Max absolute error " << max_abs_err_idx.first
             << " at idx=" << std::distance(DM.begin(), max_abs_err_idx.second)
+            << " (i=" << max_abs_error_i << ", j=" << max_abs_error_j << ")"
             << '\n';
   std::cout << "Max relative error " << max_rel_err_idx.first
             << " at idx=" << std::distance(DM.begin(), max_rel_err_idx.second)
+            << " (i=" << max_rel_error_i << ", j=" << max_rel_error_j << ")"
             << '\n';
 }