From e2110ae18c8e3a4ceafa51d14638a6b4519d18e2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 13 Sep 2024 10:12:14 -0700
Subject: [PATCH] Integrate LLVM at llvm/llvm-project@f0b3287297ae

Updates LLVM usage to match
[f0b3287297ae](https://github.com/llvm/llvm-project/commit/f0b3287297ae)

PiperOrigin-RevId: 674342494
---
 third_party/llvm/generated.patch              |  27 +
 third_party/llvm/workspace.bzl                |   4 +-
 third_party/shardy/temporary.patch            | 524 ++----------------
 third_party/shardy/workspace.bzl              |   4 +-
 .../tsl/third_party/llvm/generated.patch      |  27 +
 .../tsl/third_party/llvm/workspace.bzl        |   4 +-
 6 files changed, 98 insertions(+), 492 deletions(-)

diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch
index 509398da979e8..ed6047cd2b8f2 100644
--- a/third_party/llvm/generated.patch
+++ b/third_party/llvm/generated.patch
@@ -1 +1,28 @@
 Auto generated patch. Do not edit or delete it, even if empty.
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
+--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
+ 
+ #include <immintrin.h>
+ #include <stddef.h>
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
+ 
+ #include <immintrin.h>
+ #include <stddef.h>
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
+--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
++++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -Wall -Werror -verify
+ 
+ unsigned long long test_mm_cvttssd(unsigned long long __A) {
+   return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}}
diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl
index 2f731c7a2e4da..aaf1f1a2db970 100644
--- a/third_party/llvm/workspace.bzl
+++ b/third_party/llvm/workspace.bzl
@@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
 
 def repo(name):
     """Imports LLVM."""
-    LLVM_COMMIT = "36adf8ecedb64047021265a1e1730773d3b3a9e8"
-    LLVM_SHA256 = "7baedfc21f67f64f054482cbe77cb3049cd4428187cd45799e10ff8eb03dc9f6"
+    LLVM_COMMIT = "f0b3287297aeeddcf030e3c1b08d05a69ad465aa"
+    LLVM_SHA256 = "3bc65e7a760a389f5ace1146cb2ffde724a272e97e71c8b8509149e827df6c83"
 
     tf_http_archive(
         name = name,
diff --git a/third_party/shardy/temporary.patch b/third_party/shardy/temporary.patch
index 101b6b56e4ca4..5ddce7071d49f 100644
--- a/third_party/shardy/temporary.patch
+++ b/third_party/shardy/temporary.patch
@@ -1,496 +1,48 @@
-diff --git a/shardy/dialect/sdy/ir/data_flow_utils.cc b/shardy/dialect/sdy/ir/data_flow_utils.cc
-index e2cea1f..e53adf5 100644
---- a/shardy/dialect/sdy/ir/data_flow_utils.cc
-+++ b/shardy/dialect/sdy/ir/data_flow_utils.cc
-@@ -31,6 +31,11 @@ namespace sdy {
- 
- namespace {
- 
-+bool isDataFlowOp(Operation* op) {
-+  return isa<stablehlo::CaseOp, stablehlo::OptimizationBarrierOp,
-+             stablehlo::WhileOp>(op);
-+}
-+
- // Gets the owning op if it is a shardable data flow op interface op.
- ShardableDataFlowOpInterface getOwningShardableDataFlowOp(Value value) {
-   return dyn_cast<ShardableDataFlowOpInterface>(getOwningOp(value));
-@@ -71,12 +76,6 @@ Value getDataFlowEdgeOwner(OpOperand& source) {
- }
- 
- }  // namespace
--
--bool isDataFlowOp(Operation* op) {
--  return isa<stablehlo::CaseOp, stablehlo::OptimizationBarrierOp,
--             stablehlo::WhileOp, ShardableDataFlowOpInterface>(op);
--}
--
- ResultRange getDataFlowEdgeResultOwners(Operation* op) {
-   if (auto shardableDataFlowOp = dyn_cast<ShardableDataFlowOpInterface>(op)) {
-     return shardableDataFlowOp.getOpResultEdgeOwners();
-@@ -88,19 +87,6 @@ ResultRange getDataFlowEdgeResultOwners(Operation* op) {
-   return ResultRange(nullptr, 0);
- }
- 
--ArrayRef<BlockArgument> getDataFlowEdgeBlockArgumentOwners(Operation* op) {
--  if (auto shardableDataFlowOp = dyn_cast<ShardableDataFlowOpInterface>(op)) {
--    return shardableDataFlowOp.getBlockArgumentEdgeOwners();
--  }
--  return {};
--}
--
--void setBlockArgumentEdgeOwnerShardings(
--    Operation* op, ArrayRef<TensorShardingAttr> shardings) {
--  cast<ShardableDataFlowOpInterface>(op).setBlockArgumentEdgeOwnerShardings(
--      shardings);
--}
--
- DataFlowEdgeOp getDataFlowEdge(Value target) {
-   return DataFlowEdgeOp::getDataFlowEdgeUser(getDataFlowEdgeOwner(target));
- }
-diff --git a/shardy/dialect/sdy/ir/data_flow_utils.h b/shardy/dialect/sdy/ir/data_flow_utils.h
-index 99ffde8..992a0b8 100644
---- a/shardy/dialect/sdy/ir/data_flow_utils.h
-+++ b/shardy/dialect/sdy/ir/data_flow_utils.h
-@@ -35,18 +35,10 @@ namespace sdy {
- 
- // See `DataFlowEdgeOp` documentation for more information on data-flow edges.
- 
--// Returns true if the `op` defines data flow edges, e.g. , it's a
--// `ShardableDataFlowOpInterface`.
--bool isDataFlowOp(Operation* op);
--
- // If `op` has data-flow edges, returns their op result edge owners (e.g., all
- // results of a while/case op), otherwise returns an empty range.
- ResultRange getDataFlowEdgeResultOwners(Operation* op);
- 
--// If `op` is a `ShardableDataFlowOpInterface` which can have block argument
--// edge owners, returns the owners, otherwise returns an empty range.
--ArrayRef<BlockArgument> getDataFlowEdgeBlockArgumentOwners(Operation* op);
--
- // If `target` is a target of a data-flow edge, returns the corresponding
- // `DataFlowEdgeOp`, otherwise returns `nullptr`.
- DataFlowEdgeOp getDataFlowEdge(Value target);
-@@ -62,11 +54,6 @@ SmallVector<Value> getDataFlowSources(DataFlowEdgeOp dataFlowEdge);
- void forEachNonEdgeOwnerDataFlowTarget(DataFlowEdgeOp dataFlowEdge,
-                                        std::function<void(Value)> fn);
- 
--// Sets the block argument edge owner shardings if the `op` is a
--// `ShardableDataFlowOpInterface`.
--void setBlockArgumentEdgeOwnerShardings(Operation* op,
--                                        ArrayRef<TensorShardingAttr> shardings);
--
- }  // namespace sdy
- }  // namespace mlir
- 
-diff --git a/shardy/dialect/sdy/ir/parsers.cc b/shardy/dialect/sdy/ir/parsers.cc
-index c48bb2a..7748bc2 100644
---- a/shardy/dialect/sdy/ir/parsers.cc
-+++ b/shardy/dialect/sdy/ir/parsers.cc
-@@ -199,7 +199,7 @@ FailureOr<int64_t> parseFactorSymbolIndex(AsmParser& parser,
- ParseResult parseSymbolIndices(AsmParser& parser, StringRef factorsStr,
-                                SmallVector<int64_t>& indices) {
-   while (!factorsStr.empty()) {
--    // TODO(bartchr): Add SDY_ASSIGN_OR_RETURN_FAILURE macro for re-returning
-+    // TODO(bartchr): Add ASSIGN_OR_RETURN_FAILURE macro for re-returning
-     // failures. Or check if there already is one in MLIR.
-     FailureOr<int64_t> index = parseFactorSymbolIndex(parser, factorsStr);
-     if (failed(index)) {
-diff --git a/shardy/dialect/sdy/transforms/common/macros.h b/shardy/dialect/sdy/transforms/common/macros.h
-index c69c1e2..c16502a 100644
---- a/shardy/dialect/sdy/transforms/common/macros.h
-+++ b/shardy/dialect/sdy/transforms/common/macros.h
-@@ -17,9 +17,8 @@ limitations under the License.
- #define SHARDY_DIALECT_SDY_TRANSFORMS_COMMON_MACROS_H_
- 
- // Macro to assign value from std::optional<T> or return std::nullopt.
--#define SDY_ASSIGN_OR_RETURN_IF_NULLOPT(lhs, expr)                           \
--  SDY_ASSIGN_OR_RETURN_IF_NULLOPT_IMPL(CONCAT_(_expr_result, __LINE__), lhs, \
--                                       expr)
-+#define ASSIGN_OR_RETURN_IF_NULLOPT(lhs, expr) \
-+  ASSIGN_OR_RETURN_IF_NULLOPT_IMPL(CONCAT_(_expr_result, __LINE__), lhs, expr)
- 
- // =================================================================
- // == Implementation details, do not rely on anything below here. ==
-@@ -28,11 +27,11 @@ limitations under the License.
- #define CONCAT_INNER_(x, y) x##y
- #define CONCAT_(x, y) CONCAT_INNER_(x, y)
- 
--#define SDY_ASSIGN_OR_RETURN_IF_NULLOPT_IMPL(result, lhs, expr) \
--  auto result = expr;                                           \
--  if (!result.has_value()) {                                    \
--    return std::nullopt;                                        \
--  }                                                             \
-+#define ASSIGN_OR_RETURN_IF_NULLOPT_IMPL(result, lhs, expr) \
-+  auto result = expr;                                       \
-+  if (!result.has_value()) {                                \
-+    return std::nullopt;                                    \
-+  }                                                         \
-   lhs = std::move(result).value();
- 
- #endif  // SHARDY_DIALECT_SDY_TRANSFORMS_COMMON_MACROS_H_
-diff --git a/shardy/dialect/sdy/transforms/export/sink_data_flow_edges.cc b/shardy/dialect/sdy/transforms/export/sink_data_flow_edges.cc
-index 9b2f62a..ab4d281 100644
---- a/shardy/dialect/sdy/transforms/export/sink_data_flow_edges.cc
-+++ b/shardy/dialect/sdy/transforms/export/sink_data_flow_edges.cc
-@@ -15,19 +15,20 @@ limitations under the License.
- 
- #include <cassert>
- #include <memory>  // IWYU pragma: keep
-+#include <utility>
- 
- #include "llvm/ADT/STLExtras.h"
--#include "llvm/ADT/SmallVector.h"
- #include "mlir/Dialect/Func/IR/FuncOps.h"
-+#include "mlir/IR/MLIRContext.h"
- #include "mlir/IR/Operation.h"
- #include "mlir/IR/PatternMatch.h"
- #include "mlir/IR/Value.h"
--#include "mlir/IR/ValueRange.h"
--#include "mlir/IR/Visitors.h"
- #include "mlir/Pass/Pass.h"  // IWYU pragma: keep
-+#include "mlir/Rewrite/FrozenRewritePatternSet.h"
- #include "mlir/Support/LLVM.h"
-+#include "mlir/Support/LogicalResult.h"
-+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
- #include "shardy/dialect/sdy/ir/constants.h"
--#include "shardy/dialect/sdy/ir/data_flow_utils.h"
- #include "shardy/dialect/sdy/ir/dialect.h"
- #include "shardy/dialect/sdy/ir/utils.h"
- 
-@@ -39,85 +40,96 @@ namespace sdy {
- 
- namespace {
- 
--// Gets a vector of `TensorShardingAttr` for the given edge owner.
--//
--// Each value in `edgeOwners` is the owner of a data flow edge. If the data flow
--// edge already has a sharding, we will copy the sharding. Otherwise, if one
--// of the owners in `edgeOwners` has a sharding, we create a fully open sharding
--// with the mesh name of the first such sharding for all the other values that
--// don't have a sharding.
--SmallVector<TensorShardingAttr> getShardingsFromDataFlowEdges(
--    ValueRange edgeOwners) {
--  SmallVector<TensorShardingAttr> shardings;
--  shardings.reserve(edgeOwners.size());
--
--  StringRef meshName;
--  for (Value edgeOwner : edgeOwners) {
--    TensorShardingAttr sharding;
--    if (DataFlowEdgeOp dataFlowEdgeOp =
--            DataFlowEdgeOp::getDataFlowEdgeUser(edgeOwner)) {
--      sharding = dataFlowEdgeOp.getShardingAttr();
--      if (sharding && meshName.empty()) {
--        meshName = sharding.getMeshName();
-+// This pattern matches on a specific `DataFlowEdgeOp`, but will also sink any
-+// other `DataFlowEdgeOp` whose input is defined by the same op. This way we can
-+// build the `TensorShardingPerValueAttr` for the defining op once.
-+class SinkDataFlowEdgesPattern : public OpRewritePattern<DataFlowEdgeOp> {
-+ public:
-+  using OpRewritePattern::OpRewritePattern;
-+
-+ private:
-+  LogicalResult matchAndRewrite(DataFlowEdgeOp dataFlowEdgeOp,
-+                                PatternRewriter& rewriter) const override {
-+    Operation* defOp = dataFlowEdgeOp.getInput().getDefiningOp();
-+    if (!defOp) {
-+      // `dataFlowEdgeOp` takes a block argument, we ignore the sharding of
-+      // `dataFlowEdgeOp` since a block argument can't have a sharding attached.
-+      // TODO(tomnatan): we might need to revisit this for future use cases.
-+      rewriter.replaceOp(dataFlowEdgeOp, dataFlowEdgeOp.getInput());
-+      return success();
-+    }
-+
-+    SmallVector<TensorShardingAttr> shardings(defOp->getNumResults());
-+
-+    // For each result of `defOp` that is used by a `DataFlowEdgeOp`:
-+    // - If the `DataFlowEdgeOp` has a sharding, add it to `shardings`.
-+    // - Replace the `DataFlowEdgeOp` with its input.
-+    //
-+    // In addition, stores the mesh name of first encountered sharding, as we
-+    // need a mesh name to replace missing shardings with fully replicated
-+    // shardings. Note that it's ok to pick an arbitrary mesh if there are
-+    // multiple, as we are creating fully replicated shardings.
-+    StringRef meshName;
-+    for (auto [index, result] : llvm::enumerate(defOp->getResults())) {
-+      // We can assume a `DataFlowEdgeOp` will be the only user of its input.
-+      DataFlowEdgeOp dataFlowEdgeOp =
-+          DataFlowEdgeOp::getDataFlowEdgeUser(result);
-+      if (!dataFlowEdgeOp) {
-+        continue;
-+      }
-+      if (TensorShardingAttr sharding = dataFlowEdgeOp.getShardingAttr()) {
-+        shardings[index] = sharding;
-+        if (meshName.empty()) {
-+          meshName = sharding.getMeshName();
-+        }
-       }
-+      rewriter.replaceOp(dataFlowEdgeOp, dataFlowEdgeOp.getInput());
-     }
--    shardings.push_back(sharding);
--  }
--  if (meshName.empty()) {
--    return {};
--  }
--  // There is at least one `DataFlowEdgeOp` with a sharding.
--  // Replace all empty shardings with fully open shardings.
--  // NOTE: this will replace the existing edgeOwner's sharding, if any, though
--  // this shouldn't happen as as `sdy-add-data-flow-edges` would have copied it.
--  for (auto [sharding, edgeOwner] : llvm::zip_equal(shardings, edgeOwners)) {
--    if (!sharding) {
--      sharding = TensorShardingAttr::getFullyOpen(
--          edgeOwner.getContext(), getTensorRank(edgeOwner), meshName);
-+
-+    if (!meshName.empty()) {
-+      // There is at least one `DataFlowEdgeOp` with a sharding.
-+      // Replace all empty shardings with fully open shardings.
-+      for (auto [sharding, result] :
-+           llvm::zip(shardings, defOp->getResults())) {
-+        if (!sharding) {
-+          sharding = getOrCreateSharding(result, meshName);
-+        }
-+      }
-+      defOp->setAttr(kShardingAttr, TensorShardingPerValueAttr::get(
-+                                        defOp->getContext(), shardings));
-     }
-+
-+    return success();
-   }
--  return shardings;
--}
-+};
- 
- struct SinkDataFlowEdgesPass
-     : public impl::SinkDataFlowEdgesPassBase<SinkDataFlowEdgesPass> {
-   using SinkDataFlowEdgesPassBase::SinkDataFlowEdgesPassBase;
- 
-+  LogicalResult initialize(MLIRContext* context) final {
-+    config.useTopDownTraversal = true;
-+    config.enableRegionSimplification =
-+        mlir::GreedySimplifyRegionLevel::Disabled;
-+    config.maxIterations = 2;
-+
-+    RewritePatternSet patternsInternal(context);
-+    patternsInternal.add<SinkDataFlowEdgesPattern>(context);
-+    patterns = std::move(patternsInternal);
-+
-+    return success();
-+  }
-+
-   void runOnOperation() final {
--    func::FuncOp funcOp = getOperation();
--    IRRewriter rewriter(funcOp);
--    // Copy the sharding from data flow edges to the data flow ops.
--    funcOp.walk<WalkOrder::PreOrder>([&](Operation* op) {
--      // Since we are doing the walk in preorder with a forward iterator, ops
--      // are walked before their users and regions. Since `DataFlowEdgeOp` can
--      // only appear inside the data flow op's region or as its user, we always
--      // encounter the data flow op before their data flow edges. This means it
--      // is safe to erase the `DataFlowEdgeOp` at this point. We need the skip
--      // at the end because it's a condition to erase the op. See the
--      // documentation for `Operation::walk` for more details.
--      if (isa<DataFlowEdgeOp>(op)) {
--        DataFlowEdgeOp dataFlowEdgeOp = cast<DataFlowEdgeOp>(op);
--        rewriter.replaceOp(dataFlowEdgeOp, dataFlowEdgeOp.getInput());
--        return WalkResult::skip();
--      }
--      if (!isDataFlowOp(op)) {
--        return WalkResult::advance();
--      }
--      if (SmallVector<TensorShardingAttr> blockArgShardings =
--              getShardingsFromDataFlowEdges(
--                  getDataFlowEdgeBlockArgumentOwners(op));
--          !blockArgShardings.empty()) {
--        setBlockArgumentEdgeOwnerShardings(op, blockArgShardings);
--      }
--      if (SmallVector<TensorShardingAttr> resultShardings =
--              getShardingsFromDataFlowEdges(getDataFlowEdgeResultOwners(op));
--          !resultShardings.empty()) {
--        op->setAttr(kShardingAttr, TensorShardingPerValueAttr::get(
--                                       op->getContext(), resultShardings));
--      }
--      return WalkResult::advance();
--    });
-+    if (failed(
-+            applyPatternsAndFoldGreedily(getOperation(), patterns, config))) {
-+      signalPassFailure();
-+    }
-   }
-+
-+ private:
-+  FrozenRewritePatternSet patterns;
-+  GreedyRewriteConfig config;
- };
- 
- }  // namespace
-diff --git a/shardy/dialect/sdy/transforms/export/test/sink_data_flow_edges.mlir b/shardy/dialect/sdy/transforms/export/test/sink_data_flow_edges.mlir
-index eee582c..194b28b 100644
---- a/shardy/dialect/sdy/transforms/export/test/sink_data_flow_edges.mlir
-+++ b/shardy/dialect/sdy/transforms/export/test/sink_data_flow_edges.mlir
-@@ -103,7 +103,7 @@ func.func @all_edges_have_sharding(%arg0: tensor<32x96xf32>, %arg1: tensor<32x96
- func.func @missing_edge(%arg0: tensor<32x96xf32>, %arg1: tensor<32x96xf32>)
-     -> (tensor<32x96xf32>, tensor<32x96xf32>) {
-   // CHECK-NEXT: %[[OPT_BARRIER:.*]]:2 = stablehlo.optimization_barrier
--  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{?}, {?}]>, <@mesh, [{"a", ?}, {}]>]>}
-+  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{"b"}, {?}]>, <@mesh, [{"a", ?}, {}]>]>}
-   // CHECK-NEXT: return %[[OPT_BARRIER]]#0, %[[OPT_BARRIER]]#1
-   %0:2 = stablehlo.optimization_barrier
-     {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{"b"}, {?}]>, <@mesh, [{?}, {}]>]>}
-@@ -134,7 +134,7 @@ func.func @sharding_overrided(%arg0: tensor<32x96xf32>, %arg1: tensor<32x96xf32>
- func.func @edge_missing_sharding(%arg0: tensor<32x96xf32>, %arg1: tensor<32x96xf32>)
-     -> (tensor<32x96xf32>, tensor<32x96xf32>) {
-   // CHECK-NEXT: %[[OPT_BARRIER:.*]]:2 = stablehlo.optimization_barrier
--  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{?}, {?}]>, <@mesh, [{"a", ?}, {}]>]>}
-+  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{"b"}, {?}]>, <@mesh, [{"a", ?}, {}]>]>}
-   // CHECK-NEXT: return %[[OPT_BARRIER]]#0, %[[OPT_BARRIER]]#1
-   %0:2 = stablehlo.optimization_barrier
-     {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{"b"}, {?}]>, <@mesh, [{?}, {}]>]>}
-diff --git a/shardy/dialect/sdy/transforms/import/add_data_flow_edges.cc b/shardy/dialect/sdy/transforms/import/add_data_flow_edges.cc
-index ee9ee1d..4f5ac97 100644
---- a/shardy/dialect/sdy/transforms/import/add_data_flow_edges.cc
-+++ b/shardy/dialect/sdy/transforms/import/add_data_flow_edges.cc
-@@ -63,7 +63,12 @@ struct AddDataFlowEdgesPass
-     funcOp.walk([&](Operation* op) {
-       // Add the data flow edges for result owners and block argument owners.
-       addDataFlowEdges(getDataFlowEdgeResultOwners(op), rewriter);
--      addDataFlowEdges(getDataFlowEdgeBlockArgumentOwners(op), rewriter);
-+      if (auto shardableDataFlowOpInterface =
-+              dyn_cast<ShardableDataFlowOpInterface>(op)) {
-+        addDataFlowEdges(
-+            shardableDataFlowOpInterface.getBlockArgumentEdgeOwners(),
-+            rewriter);
-+      }
-     });
-   }
- };
-diff --git a/shardy/dialect/sdy/transforms/propagation/basic_factor_propagation.cc b/shardy/dialect/sdy/transforms/propagation/basic_factor_propagation.cc
-index bb05f11..812dc73 100644
---- a/shardy/dialect/sdy/transforms/propagation/basic_factor_propagation.cc
-+++ b/shardy/dialect/sdy/transforms/propagation/basic_factor_propagation.cc
-@@ -47,8 +47,8 @@ std::optional<AxisRefAttr> getPrefixWithoutOverlap(
-     AxisRefAttr axisRef, ArrayRef<AxisRefAttr> otherAxisRefs) {
-   AxisRefAttr result = axisRef;
-   for (AxisRefAttr otherAxisRef : otherAxisRefs) {
--    SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
--        result, result.getPrefixWithoutOverlap(otherAxisRef));
-+    ASSIGN_OR_RETURN_IF_NULLOPT(result,
-+                                result.getPrefixWithoutOverlap(otherAxisRef));
-   }
-   return result;
- }
-@@ -62,9 +62,9 @@ BasicFactorPropagation::compatiblePrefixNoConflictsAcrossFactors(
-   AxisRefAttr result = axisRef;
-   for (const auto& [otherFactorIndex, shardings] : factorIndexToSharding) {
-     if (otherFactorIndex != factorIndex) {
--      SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
-+      ASSIGN_OR_RETURN_IF_NULLOPT(
-           result, getPrefixWithoutOverlap(result, shardings.overflowAxes));
--      SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
-+      ASSIGN_OR_RETURN_IF_NULLOPT(
-           result, getPrefixWithoutOverlap(result, shardings.axisRefs));
-     }
-   }
-@@ -78,8 +78,8 @@ BasicFactorPropagation::compatiblePrefixNoConflictsWithinFactor(
-     int64_t factorSize) const {
-   AxisRefAttr result = axisRef;
- 
--  SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
--      result, getPrefixWithoutOverlap(result, replicatedAxes));
-+  ASSIGN_OR_RETURN_IF_NULLOPT(result,
-+                              getPrefixWithoutOverlap(result, replicatedAxes));
- 
-   ArrayRef<AxisRefAttr> factorAxes = factorSharding.axisRefs;
-   if (llvm::any_of(factorAxes, [&](AxisRefAttr shardingAxis) {
-@@ -323,9 +323,9 @@ std::optional<AxisRefAttr> BasicFactorPropagation::compatiblePrefix(
-   const FactorIndexToSharding& factorIndexToSharding =
-       tensorFactorSharding.factorIndexToSharding;
- 
--  SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
--      AxisRefAttr result, compatiblePrefixNoConflictsAcrossFactors(
--                              axisRef, factorIndexToSharding, factorIndex));
-+  ASSIGN_OR_RETURN_IF_NULLOPT(AxisRefAttr result,
-+                              compatiblePrefixNoConflictsAcrossFactors(
-+                                  axisRef, factorIndexToSharding, factorIndex));
- 
-   auto factorShardingIt = factorIndexToSharding.find(factorIndex);
-   if (factorShardingIt == factorIndexToSharding.end()) {
-@@ -351,7 +351,7 @@ std::optional<AxisRefAttr> BasicFactorPropagation::compatiblePrefix(
-   for (const TensorFactorShardings& tensorFactorSharding :
-        llvm::concat<const TensorFactorShardings>(projection.getOperands(),
-                                                  projection.getResults())) {
--    SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
-+    ASSIGN_OR_RETURN_IF_NULLOPT(
-         result, compatiblePrefix(result, tensorFactorSharding, factorIndex,
-                                  shardedSize, factorSize));
-   }
+diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch
+index 509398d..ed6047c 100644
+--- a/third_party/llvm/generated.patch
++++ b/third_party/llvm/generated.patch
+@@ -1 +1,28 @@
+ Auto generated patch. Do not edit or delete it, even if empty.
++diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
++--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
+++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
++@@ -1,4 +1,4 @@
++-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
++ 
++ #include <immintrin.h>
++ #include <stddef.h>
++diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
++--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
++@@ -1,4 +1,4 @@
++-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
++ 
++ #include <immintrin.h>
++ #include <stddef.h>
++diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
++--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
+++++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
++@@ -1,4 +1,4 @@
++-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -Wall -Werror -verify
+++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -Wall -Werror -verify
++ 
++ unsigned long long test_mm_cvttssd(unsigned long long __A) {
++   return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}}
 diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl
-index aab5a85..2f731c7 100644
+index 2f731c7..aaf1f1a 100644
 --- a/third_party/llvm/workspace.bzl
 +++ b/third_party/llvm/workspace.bzl
 @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
  
  def repo(name):
      """Imports LLVM."""
--    LLVM_COMMIT = "3cd01371e007b2a8fe32e5d8ce1154057e5e1c2e"
--    LLVM_SHA256 = "3d1ee3e896689b5ff2e8cc547e554c59bc70d1101ede9f25be9ca53d9dc409b9"
-+    LLVM_COMMIT = "36adf8ecedb64047021265a1e1730773d3b3a9e8"
-+    LLVM_SHA256 = "7baedfc21f67f64f054482cbe77cb3049cd4428187cd45799e10ff8eb03dc9f6"
+-    LLVM_COMMIT = "36adf8ecedb64047021265a1e1730773d3b3a9e8"
+-    LLVM_SHA256 = "7baedfc21f67f64f054482cbe77cb3049cd4428187cd45799e10ff8eb03dc9f6"
++    LLVM_COMMIT = "f0b3287297aeeddcf030e3c1b08d05a69ad465aa"
++    LLVM_SHA256 = "3bc65e7a760a389f5ace1146cb2ffde724a272e97e71c8b8509149e827df6c83"
  
      tf_http_archive(
          name = name,
-diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch
-index a506670..0f83544 100755
---- a/third_party/stablehlo/temporary.patch
-+++ b/third_party/stablehlo/temporary.patch
-@@ -1,29 +1,33 @@
- diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/unary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/unary.mlir
- --- stablehlo/stablehlo/conversions/tosa/tests/unary.mlir
- +++ stablehlo/stablehlo/conversions/tosa/tests/unary.mlir
--@@ -119,13 +119,15 @@
--   return %0 : tensor<10xf32>
-- }
-+@@ -121,8 +121,8 @@
-  
---// CHECK-LABEL: @transpose
---func.func @transpose(%arg0: tensor<1x2x3xf32>) -> tensor<3x2x1xf32> {
-+ // CHECK-LABEL: @transpose
-+ func.func @transpose(%arg0: tensor<1x2x3xf32>) -> tensor<3x2x1xf32> {
- -  // CHECK-DAG: %[[VAR0:.*]] = "tosa.const"() <{value = dense<[2, 1, 0]> : tensor<3xi64>}> : () -> tensor<3xi64>
- -  // CHECK-DAG: %[[VAR1:.*]] = tosa.transpose %arg0, %[[VAR0]]
---  %0 = "stablehlo.transpose"(%arg0) {permutation = array<i64: 2, 1, 0>} : (tensor<1x2x3xf32>) -> tensor<3x2x1xf32>
---  return %0 : tensor<3x2x1xf32>
---}
--+// TODO: https://github.com/llvm/llvm-project/pull/108133 breaks the test,
--+//       need to investigate this.
--+// disableCHECK-LABEL: @transpose
--+// func.func @transpose(%arg0: tensor<1x2x3xf32>) -> tensor<3x2x1xf32> {
--+  // disableCHECK-DAG: %[[VAR0:.*]] = "tosa.const"() <{value = dense<[2, 1, 0]> : tensor<3xi64>}> : () -> tensor<3xi64>
--+  // disableCHECK-DAG: %[[VAR1:.*]] = tosa.transpose %arg0, %[[VAR0]]
--+  // %0 = "stablehlo.transpose"(%arg0) {permutation = array<i64: 2, 1, 0>} : (tensor<1x2x3xf32>) -> tensor<3x2x1xf32>
--+  // return %0 : tensor<3x2x1xf32>
--+// }
-++  // CHECK: %[[VAR0:.*]] = "tosa.const"() <{value = dense<[2, 1, 0]> : tensor<3xi32>}> : () -> tensor<3xi32>
-++  // CHECK: %[[VAR1:.*]] = tosa.transpose %arg0, %[[VAR0]]
-+   %0 = "stablehlo.transpose"(%arg0) {permutation = array<i64: 2, 1, 0>} : (tensor<1x2x3xf32>) -> tensor<3x2x1xf32>
-+   return %0 : tensor<3x2x1xf32>
-+ }
-+diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.cpp b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.cpp
-+--- stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.cpp
-++++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.cpp
-+@@ -451,9 +451,10 @@
-  
-- // CHECK-LABEL: @while
-- func.func @while(%arg0: tensor<i32>) -> tensor<i32> {
-+     auto perms = op.getPermutation();
-+     auto type = RankedTensorType::get({static_cast<int64_t>(perms.size())},
-+-                                      rewriter.getI64Type());
-++                                      rewriter.getI32Type());
-++    std::vector<int32_t> perms_int32(perms.begin(), perms.end());
-+     auto constOp = rewriter.create<tosa::ConstOp>(
-+-        op->getLoc(), type, DenseIntElementsAttr::get(type, perms));
-++        op->getLoc(), type, DenseIntElementsAttr::get(type, perms_int32));
-+     rewriter.replaceOpWithNewOp<tosa::TransposeOp>(op, op.getType(),
-+                                                    op.getOperand(), constOp);
-+     return success();
- diff --ruN a/stablehlo/stablehlo/dialect/Version.cpp b/stablehlo/stablehlo/dialect/Version.cpp
- --- stablehlo/stablehlo/dialect/Version.cpp
- +++ stablehlo/stablehlo/dialect/Version.cpp
diff --git a/third_party/shardy/workspace.bzl b/third_party/shardy/workspace.bzl
index 8b5f84dffb563..e80cf84d1a1e8 100644
--- a/third_party/shardy/workspace.bzl
+++ b/third_party/shardy/workspace.bzl
@@ -3,8 +3,8 @@
 load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls")
 
 def repo():
-    SHARDY_COMMIT = "17543e47ff8adb535322df2a9ad9e3171c9741c9"
-    SHARDY_SHA256 = "13ee573236d481bc7996a01c44ece6e70ad17a3b643e925155c830f4fbc09094"
+    SHARDY_COMMIT = "1142aa484b8e11440f609859a6a55edfb733f869"
+    SHARDY_SHA256 = "ecb2037f2e098488d2061f988b6088d8834460e61fe73ffc32ea5e9c6692b96c"
 
     tf_http_archive(
         name = "shardy",
diff --git a/third_party/tsl/third_party/llvm/generated.patch b/third_party/tsl/third_party/llvm/generated.patch
index 509398da979e8..ed6047cd2b8f2 100644
--- a/third_party/tsl/third_party/llvm/generated.patch
+++ b/third_party/tsl/third_party/llvm/generated.patch
@@ -1 +1,28 @@
 Auto generated patch. Do not edit or delete it, even if empty.
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
+--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
+ 
+ #include <immintrin.h>
+ #include <stddef.h>
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
++++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify
+ 
+ #include <immintrin.h>
+ #include <stddef.h>
+diff -ruN --strip-trailing-cr a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
+--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
++++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
+@@ -1,4 +1,4 @@
+-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -Wall -Werror -verify
++// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -Wall -Werror -verify
+ 
+ unsigned long long test_mm_cvttssd(unsigned long long __A) {
+   return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}}
diff --git a/third_party/tsl/third_party/llvm/workspace.bzl b/third_party/tsl/third_party/llvm/workspace.bzl
index 2f731c7a2e4da..aaf1f1a2db970 100644
--- a/third_party/tsl/third_party/llvm/workspace.bzl
+++ b/third_party/tsl/third_party/llvm/workspace.bzl
@@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
 
 def repo(name):
     """Imports LLVM."""
-    LLVM_COMMIT = "36adf8ecedb64047021265a1e1730773d3b3a9e8"
-    LLVM_SHA256 = "7baedfc21f67f64f054482cbe77cb3049cd4428187cd45799e10ff8eb03dc9f6"
+    LLVM_COMMIT = "f0b3287297aeeddcf030e3c1b08d05a69ad465aa"
+    LLVM_SHA256 = "3bc65e7a760a389f5ace1146cb2ffde724a272e97e71c8b8509149e827df6c83"
 
     tf_http_archive(
         name = name,