From b380334d1a2e5c54240f1620b34de2b1239f9beb Mon Sep 17 00:00:00 2001 From: xla authors Date: Thu, 3 Oct 2024 03:38:32 -0700 Subject: [PATCH] Integrate LLVM at llvm/llvm-project@00128a20eec2 Updates LLVM usage to match [00128a20eec2](https://github.com/llvm/llvm-project/commit/00128a20eec2) PiperOrigin-RevId: 681804881 --- third_party/llvm/generated.patch | 935 +++++++- third_party/llvm/workspace.bzl | 4 +- third_party/shardy/temporary.patch | 1927 ++++++++--------- .../tsl/third_party/llvm/generated.patch | 935 +++++++- .../tsl/third_party/llvm/workspace.bzl | 4 +- 5 files changed, 2673 insertions(+), 1132 deletions(-) diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch index 1bea5353eeed4..155d3f2cc1ec4 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch @@ -1,78 +1,901 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch1/include/toy/Lexer.h b/mlir/examples/toy/Ch1/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch1/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch1/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp +--- a/clang/lib/CodeGen/CGDeclCXX.cpp ++++ b/clang/lib/CodeGen/CGDeclCXX.cpp +@@ -640,13 +640,13 @@ + addUsedGlobal(COMDATKey); + } - #include "llvm/ADT/StringRef.h" +- // If comdats are in use and supported, place the initializer function into +- // the comdat group of the global. In the MS ABI, initializers are mangled +- // and have their own comdat, so we don't include them in the group for +- // consistency with MSVC. ++ // If we used a COMDAT key for the global ctor, the init function can be ++ // discarded if the global ctor entry is discarded. ++ // FIXME: Do we need to restrict this to ELF and Wasm? + llvm::Comdat *C = Addr->getComdat(); +- if (COMDATKey && C && getTriple().supportsCOMDAT() && +- !getTarget().getCXXABI().isMicrosoft()) { ++ if (COMDATKey && C && ++ (getTarget().getTriple().isOSBinFormatELF() || ++ getTarget().getTriple().isOSBinFormatWasm())) { + Fn->setComdat(C); + } + } else { +diff -ruN --strip-trailing-cr a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv +--- a/libcxx/docs/Status/Cxx23Issues.csv ++++ b/libcxx/docs/Status/Cxx23Issues.csv +@@ -296,7 +296,7 @@ + "`LWG3862 `__","``basic_const_iterator``'s ``common_type`` specialization is underconstrained","2023-02 (Issaquah)","","","" + "`LWG3865 `__","Sorting a range of ``pairs``","2023-02 (Issaquah)","|Complete|","17.0","" + "`LWG3869 `__","Deprecate ``std::errc`` constants related to UNIX STREAMS","2023-02 (Issaquah)","|Complete|","19.0","" +-"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","|Complete|","20.0","" ++"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","","","" + "`LWG3871 `__","Adjust note about ``terminate``","2023-02 (Issaquah)","","","" + "`LWG3872 `__","``basic_const_iterator`` should have custom ``iter_move``","2023-02 (Issaquah)","","","" + "`LWG3875 `__","``std::ranges::repeat_view::iterator`` may be ill-formed","2023-02 (Issaquah)","|Complete|","17.0","" +diff -ruN --strip-trailing-cr a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt +--- a/libcxx/include/CMakeLists.txt ++++ b/libcxx/include/CMakeLists.txt +@@ -560,6 +560,7 @@ + __memory/unique_temporary_buffer.h + __memory/uses_allocator.h + __memory/uses_allocator_construction.h ++ __memory/voidify.h + __memory_resource/memory_resource.h + __memory_resource/monotonic_buffer_resource.h + __memory_resource/polymorphic_allocator.h +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/construct_at.h b/libcxx/include/__memory/construct_at.h +--- a/libcxx/include/__memory/construct_at.h ++++ b/libcxx/include/__memory/construct_at.h +@@ -14,6 +14,7 @@ + #include <__config> + #include <__iterator/access.h> + #include <__memory/addressof.h> ++#include <__memory/voidify.h> + #include <__type_traits/enable_if.h> + #include <__type_traits/is_array.h> + #include <__utility/declval.h> +@@ -37,7 +38,7 @@ + template ()) _Tp(std::declval<_Args>()...))> + _LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) { + _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"); +- return ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); ++ return ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + } -+#include - #include - #include + #endif +@@ -48,7 +49,7 @@ + return std::construct_at(__location, std::forward<_Args>(__args)...); + #else + return _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"), +- ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); ++ ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + #endif + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch2/include/toy/Lexer.h b/mlir/examples/toy/Ch2/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch2/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch2/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h +--- a/libcxx/include/__memory/shared_ptr.h ++++ b/libcxx/include/__memory/shared_ptr.h +@@ -248,35 +248,33 @@ - #include "llvm/ADT/StringRef.h" + template + struct __shared_ptr_emplace : __shared_weak_count { +- using __value_type = __remove_cv_t<_Tp>; +- + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&...) : __storage_(std::move(__a)) { + static_assert( + sizeof...(_Args) == 0, "No argument should be provided to the control block when using _for_overwrite"); +- ::new (static_cast(__get_elem())) __value_type; ++ ::new ((void*)__get_elem()) _Tp; + } -+#include - #include - #include + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&... __args) : __storage_(std::move(__a)) { +- using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __value_type>::type; ++ using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __remove_cv_t<_Tp> >::type; + _TpAlloc __tmp(*__get_alloc()); + allocator_traits<_TpAlloc>::construct(__tmp, __get_elem(), std::forward<_Args>(__args)...); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch3/include/toy/Lexer.h b/mlir/examples/toy/Ch3/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch3/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch3/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + _LIBCPP_HIDE_FROM_ABI _Alloc* __get_alloc() _NOEXCEPT { return __storage_.__get_alloc(); } - #include "llvm/ADT/StringRef.h" +- _LIBCPP_HIDE_FROM_ABI __value_type* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } ++ _LIBCPP_HIDE_FROM_ABI _Tp* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } -+#include - #include - #include + private: + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __on_zero_shared_impl() _NOEXCEPT { +- __get_elem()->~__value_type(); ++ __get_elem()->~_Tp(); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch4/include/toy/Lexer.h b/mlir/examples/toy/Ch4/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch4/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch4/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + template (__buffer_)->__alloc_); + } -+#include - #include - #include +- _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI __value_type* __get_elem() _NOEXCEPT { ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { + return std::addressof(reinterpret_cast<_Data*>(__buffer_)->__elem_); + } + }; +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h +--- a/libcxx/include/__memory/uninitialized_algorithms.h ++++ b/libcxx/include/__memory/uninitialized_algorithms.h +@@ -21,6 +21,7 @@ + #include <__memory/allocator_traits.h> + #include <__memory/construct_at.h> + #include <__memory/pointer_traits.h> ++#include <__memory/voidify.h> + #include <__type_traits/enable_if.h> + #include <__type_traits/extent.h> + #include <__type_traits/is_array.h> +@@ -63,7 +64,7 @@ + try { + #endif + for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); ++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +@@ -93,7 +94,7 @@ + try { + #endif + for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); ++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +@@ -123,7 +124,7 @@ + try { + #endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); ++ ::new (std::__voidify(*__idx)) _ValueType(__x); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -151,7 +152,7 @@ + try { + #endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); ++ ::new (std::__voidify(*__idx)) _ValueType(__x); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -181,7 +182,7 @@ + try { + # endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType; ++ ::new (std::__voidify(*__idx)) _ValueType; + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -207,7 +208,7 @@ + try { + # endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType; ++ ::new (std::__voidify(*__idx)) _ValueType; + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -234,7 +235,7 @@ + try { + # endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(); ++ ::new (std::__voidify(*__idx)) _ValueType(); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -260,7 +261,7 @@ + try { + # endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(); ++ ::new (std::__voidify(*__idx)) _ValueType(); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -296,7 +297,7 @@ + try { + # endif + for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) { +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); ++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); + } + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { +@@ -334,7 +335,7 @@ + try { + # endif + for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); ++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/voidify.h b/libcxx/include/__memory/voidify.h +--- a/libcxx/include/__memory/voidify.h ++++ b/libcxx/include/__memory/voidify.h +@@ -0,0 +1,30 @@ ++// -*- C++ -*- ++//===----------------------------------------------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _LIBCPP___MEMORY_VOIDIFY_H ++#define _LIBCPP___MEMORY_VOIDIFY_H ++ ++#include <__config> ++#include <__memory/addressof.h> ++ ++#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) ++# pragma GCC system_header ++#endif ++ ++_LIBCPP_BEGIN_NAMESPACE_STD ++ ++template ++_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void* __voidify(_Tp& __from) { ++ // Cast away cv-qualifiers to allow modifying elements of a range through const iterators. ++ return const_cast(static_cast(std::addressof(__from))); ++} ++ ++_LIBCPP_END_NAMESPACE_STD ++ ++#endif // _LIBCPP___MEMORY_VOIDIFY_H +diff -ruN --strip-trailing-cr a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap +--- a/libcxx/include/module.modulemap ++++ b/libcxx/include/module.modulemap +@@ -1528,6 +1528,7 @@ + } + module uses_allocator { header "__memory/uses_allocator.h" } + module uses_allocator_construction { header "__memory/uses_allocator_construction.h" } ++ module voidify { header "__memory/voidify.h" } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch5/include/toy/Lexer.h b/mlir/examples/toy/Ch5/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch5/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch5/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + header "memory" + export * +diff -ruN --strip-trailing-cr a/libcxx/include/optional b/libcxx/include/optional +--- a/libcxx/include/optional ++++ b/libcxx/include/optional +@@ -287,7 +287,7 @@ + static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); + union { + char __null_state_; +- remove_cv_t __val_; ++ value_type __val_; + }; + bool __engaged_; - #include "llvm/ADT/StringRef.h" +@@ -323,7 +323,7 @@ + static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); + union { + char __null_state_; +- remove_cv_t __val_; ++ value_type __val_; + }; + bool __engaged_; -+#include - #include - #include +@@ -377,7 +377,7 @@ + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from(_That&& __opt) { + if (this->__engaged_ == __opt.has_value()) { + if (this->__engaged_) +- static_cast<_Tp&>(this->__val_) = std::forward<_That>(__opt).__get(); ++ this->__val_ = std::forward<_That>(__opt).__get(); + } else { + if (this->__engaged_) + this->reset(); +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp +@@ -80,6 +80,21 @@ + a.deallocate(p, 2); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch6/include/toy/Lexer.h b/mlir/examples/toy/Ch6/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch6/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch6/include/toy/Lexer.h -@@ -15,6 +15,7 @@ ++ { ++ std::allocator a; ++ Counted const* p = a.allocate(2); ++ int count = 0; ++ std::construct_at(p, count); ++ assert(count == 1); ++ std::construct_at(p+1, count); ++ assert(count == 2); ++ (p+1)->~Counted(); ++ assert(count == 1); ++ p->~Counted(); ++ assert(count == 0); ++ a.deallocate(const_cast(p), 2); ++ } ++ + return true; + } - #include "llvm/ADT/StringRef.h" +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp +@@ -99,6 +99,16 @@ + alloc.deallocate(out, 2); + } -+#include - #include - #include ++ // Works with const pointers. ++ { ++ int x = 1; ++ const int* ptr = &x; ++ ++ const int* result = std::ranges::construct_at(ptr, 42); ++ assert(result == ptr); ++ assert(x == 42); ++ } ++ + return true; + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch7/include/toy/Lexer.h b/mlir/examples/toy/Ch7/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch7/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch7/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp +@@ -75,5 +75,17 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS - #include "llvm/ADT/StringRef.h" ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_default_construct_n(buf.cbegin(), N); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp +@@ -163,5 +163,30 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS -+#include - #include - #include ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_default_construct(buf.cbegin(), buf.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ ++ std::ranges::uninitialized_default_construct(range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp +@@ -94,5 +94,17 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_value_construct_n(buf.cbegin(), N); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp +@@ -183,5 +183,30 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_value_construct(buf.cbegin(), buf.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ std::ranges::uninitialized_value_construct(range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +@@ -104,6 +104,22 @@ + + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_copy_n(in, N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp +@@ -278,6 +278,39 @@ + Counted::reset(); + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_copy(in, in + N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::subrange out_range(out.cbegin(), out.cend()); ++ std::ranges::uninitialized_copy(in, out_range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions, (iter, sentinel) overload. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp +@@ -198,5 +198,34 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_fill(buf.cbegin(), buf.cend(), x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ std::ranges::uninitialized_fill(range, x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +@@ -101,5 +101,19 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_fill_n(buf.cbegin(), N, x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +@@ -105,6 +105,22 @@ + + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_move_n(in, N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +@@ -282,6 +282,39 @@ + Counted::reset(); + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_move(in, in + N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::subrange out_range (out.cbegin(), out.cend()); ++ std::ranges::uninitialized_move(in, out_range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions, (iter, sentinel) overload. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -10287,10 +10287,8 @@ + SDValue LeftOp = ShiftOperand.getOperand(0); + SDValue RightOp = ShiftOperand.getOperand(1); + +- // Treat zext nneg as sext - we might need to support handling these as zext +- // as well in the future, but for now just prefer sext. +- bool IsSignExt = sd_match(LeftOp, m_SExtLike(m_Value())); +- bool IsZeroExt = sd_match(LeftOp, m_ZExt(m_Value())); ++ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; ++ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; + + if (!IsSignExt && !IsZeroExt) + return SDValue(); +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -9181,12 +9181,13 @@ + for (unsigned Cnt : Slices) { + ArrayRef Slice = VL.slice(Cnt, VF); + // If any instruction is vectorized already - do not try again. +- if (const TreeEntry *SE = getTreeEntry(Slice.front()); ++ if (TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; ++ SE->UserTreeIndices.emplace_back(&E, UINT_MAX); + AddCombinedNode(SE->Idx, Cnt); + continue; + } +@@ -13396,7 +13397,12 @@ + if (CommonMask[Idx] != PoisonMaskElem) + CommonMask[Idx] = Idx; + for (auto [E, Idx] : SubVectors) { +- Value *V = castToScalarTyElem(E->VectorizedValue); ++ Value *V = E->VectorizedValue; ++ if (V->getType()->isIntOrIntVectorTy()) ++ V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) { ++ return !isKnownNonNegative( ++ V, SimplifyQuery(*R.DL)); ++ })); + Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, + Builder.getInt64(Idx)); + if (!CommonMask.empty()) { +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll +--- a/llvm/test/CodeGen/X86/pmulh.ll ++++ b/llvm/test/CodeGen/X86/pmulh.ll +@@ -953,15 +953,39 @@ + ; SSE-NEXT: movdqa %xmm0, 16(%rdi) + ; SSE-NEXT: retq + ; +-; AVX-LABEL: PR109790: +-; AVX: # %bb.0: +-; AVX-NEXT: movq %rdi, %rax +-; AVX-NEXT: vmovdqa (%rsi), %ymm0 +-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +-; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] +-; AVX-NEXT: vmovdqa %ymm0, (%rdi) +-; AVX-NEXT: vzeroupper +-; AVX-NEXT: retq ++; AVX2-LABEL: PR109790: ++; AVX2: # %bb.0: ++; AVX2-NEXT: movq %rdi, %rax ++; AVX2-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] ++; AVX2-NEXT: vmovdqa %ymm0, (%rdi) ++; AVX2-NEXT: vzeroupper ++; AVX2-NEXT: retq ++; ++; AVX512F-LABEL: PR109790: ++; AVX512F: # %bb.0: ++; AVX512F-NEXT: movq %rdi, %rax ++; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ++; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 ++; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0 ++; AVX512F-NEXT: vpmovdw %zmm0, (%rdi) ++; AVX512F-NEXT: vzeroupper ++; AVX512F-NEXT: retq ++; ++; AVX512BW-LABEL: PR109790: ++; AVX512BW: # %bb.0: ++; AVX512BW-NEXT: movq %rdi, %rax ++; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ++; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0] ++; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0 ++; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) ++; AVX512BW-NEXT: vzeroupper ++; AVX512BW-NEXT: retq + %load = load <16 x i16>, ptr %a, align 32 + %and = and <16 x i16> %load, + %ext = zext nneg <16 x i16> %and to <16 x i32> +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +--- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll ++++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +@@ -0,0 +1,97 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ++; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s ++ ++define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ++; CHECK-LABEL: define i1 @test( ++; CHECK-SAME: i64 [[V1:%.*]], ptr [[V2:%.*]], i32 [[V3:%.*]], i1 [[V4:%.*]]) { ++; CHECK-NEXT: [[NEWFUNCROOT:.*:]] ++; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ++; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <2 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], ++; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i8> ++; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i8> [[TMP3]], ++; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> ++; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer ++; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0 ++; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> ++; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> ++; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 0) ++; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float> ++; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]] ++; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> poison, i1 [[V4]], i32 0 ++; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> ++; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP14]], <2 x i1> [[TMP6]], i64 0) ++; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x float> zeroinitializer, <4 x float> [[TMP12]] ++; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 ++; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 ++; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0 ++; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32> ++; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], ++; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 ++; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false ++; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 ++; CHECK-NEXT: [[CONV_I_I1743_2:%.*]] = fptoui float [[TMP22]] to i32 ++; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2 ++; CHECK-NEXT: [[NARROW1:%.*]] = select i1 [[TMP23]], i32 [[CONV_I_I1743_2]], i32 0 ++; CHECK-NEXT: [[TMP24:%.*]] = zext i1 [[NARROW]] to i32 ++; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[NARROW1]], [[TMP24]] ++; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 ++; CHECK-NEXT: [[CONV_I_I1743_1:%.*]] = fptoui float [[TMP26]] to i32 ++; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1 ++; CHECK-NEXT: [[NARROW2:%.*]] = select i1 [[TMP27]], i32 [[CONV_I_I1743_1]], i32 0 ++; CHECK-NEXT: [[RV3:%.*]] = or i32 [[TMP25]], [[NARROW2]] ++; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 ++; CHECK-NEXT: [[CONV_I_I1743:%.*]] = fptoui float [[TMP28]] to i32 ++; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 ++; CHECK-NEXT: [[NARROW4:%.*]] = select i1 [[TMP29]], i32 [[CONV_I_I1743]], i32 0 ++; CHECK-NEXT: [[RT5:%.*]] = or i32 [[RV3]], [[NARROW4]] ++; CHECK-NEXT: [[RT:%.*]] = zext i32 [[RT5]] to i64 ++; CHECK-NEXT: store i64 [[RT]], ptr [[V2]], align 1 ++; CHECK-NEXT: ret i1 false ++; ++newFuncRoot: ++ %conv.i147.i1756.3 = uitofp i32 %v3 to float ++ %div.i.i.i1749.3 = fdiv float 0.000000e+00, %conv.i147.i1756.3 ++ %cond.i.i.i1751.3 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.3 ++ %conv.i147.i1756.2 = uitofp i32 %v3 to float ++ %div.i.i.i1749.2 = fdiv float 0.000000e+00, %conv.i147.i1756.2 ++ %cond.i.i.i1751.2 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.2 ++ %0 = lshr i64 %v1, 40 ++ %1 = trunc i64 %0 to i32 ++ %tt2 = and i32 %1, 255 ++ %cmp1.i.i.i1746.1 = icmp eq i32 %tt2, 0 ++ %conv.i147.i1756.1 = uitofp i32 %tt2 to float ++ %div.i.i.i1749.1 = fdiv float 0.000000e+00, %conv.i147.i1756.1 ++ %cond.i.i.i1751.1 = select i1 %cmp1.i.i.i1746.1, float 0.000000e+00, float %div.i.i.i1749.1 ++ %tt3 = lshr i64 %v1, 32 ++ %2 = trunc i64 %tt3 to i32 ++ %tt1 = and i32 %2, 1 ++ %cmp1.i.i.i1746 = icmp eq i32 %tt1, 0 ++ %conv.i147.i1756 = uitofp i32 %tt1 to float ++ %div.i.i.i1749 = fdiv float 0.000000e+00, %conv.i147.i1756 ++ %cond.i.i.i1751 = select i1 %cmp1.i.i.i1746, float 0.000000e+00, float %div.i.i.i1749 ++ %3 = bitcast float %cond.i.i.i1751.3 to i32 ++ %cmp.i99.i1736.3 = icmp ult i32 %3, 1333788672 ++ %conv.i.i1743.3 = fptoui float %cond.i.i.i1751.3 to i32 ++ %4 = icmp ne i32 %conv.i.i1743.3, 0 ++ %narrow = select i1 %cmp.i99.i1736.3, i1 %4, i1 false ++ %5 = bitcast float %cond.i.i.i1751.2 to i32 ++ %cmp.i99.i1736.2 = icmp ult i32 %5, 1333788672 ++ %conv.i.i1743.2 = fptoui float %cond.i.i.i1751.2 to i32 ++ %narrow1 = select i1 %cmp.i99.i1736.2, i32 %conv.i.i1743.2, i32 0 ++ %6 = zext i1 %narrow to i32 ++ %7 = or i32 %narrow1, %6 ++ %8 = bitcast float %cond.i.i.i1751.1 to i32 ++ %cmp.i99.i1736.1 = icmp ult i32 %8, 1333788672 ++ %conv.i.i1743.1 = fptoui float %cond.i.i.i1751.1 to i32 ++ %narrow2 = select i1 %cmp.i99.i1736.1, i32 %conv.i.i1743.1, i32 0 ++ %rv3 = or i32 %7, %narrow2 ++ %9 = bitcast float %cond.i.i.i1751 to i32 ++ %cmp.i99.i1736 = icmp ult i32 %9, 1333788672 ++ %conv.i.i1743 = fptoui float %cond.i.i.i1751 to i32 ++ %narrow4 = select i1 %cmp.i99.i1736, i32 %conv.i.i1743, i32 0 ++ %rt5 = or i32 %rv3, %narrow4 ++ %rt = zext i32 %rt5 to i64 ++ store i64 %rt, ptr %v2, align 1 ++ ret i1 false ++} +diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn ++++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +@@ -280,6 +280,7 @@ + "rdpruintrin.h", + "rdseedintrin.h", + "riscv_bitmanip.h", ++ "riscv_corev_alu.h", + "riscv_crypto.h", + "riscv_ntlh.h", + "rtmintrin.h", +diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn ++++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +@@ -632,6 +632,7 @@ + "__memory/unique_temporary_buffer.h", + "__memory/uses_allocator.h", + "__memory/uses_allocator_construction.h", ++ "__memory/voidify.h", + "__memory_resource/memory_resource.h", + "__memory_resource/monotonic_buffer_resource.h", + "__memory_resource/polymorphic_allocator.h", +diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ++++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +@@ -358,6 +358,7 @@ + td_file = "include/clang/Basic/BuiltinsRISCV.td", + td_srcs = [ + "include/clang/Basic/BuiltinsRISCV.td", ++ "include/clang/Basic/BuiltinsRISCVXCV.td", + "include/clang/Basic/BuiltinsBase.td", + ], + ) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index ad9923cfc2b03..80f07d34d031d 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "6292f117c39b9fc72da4e40328eeeda2aa94a5f2" - LLVM_SHA256 = "31f583de2e077f9289fc5efea74bd6e1a1694fda5f77f09472253cdc072f2e5e" + LLVM_COMMIT = "00128a20eec27246719d73ba427bf821883b00b4" + LLVM_SHA256 = "9fff2ccb6c262f3d5e2f98c281a0b99a585daee83742e1599709ff61cfc222af" tf_http_archive( name = name, diff --git a/third_party/shardy/temporary.patch b/third_party/shardy/temporary.patch index 03724d341296d..89dd4942ccafc 100644 --- a/third_party/shardy/temporary.patch +++ b/third_party/shardy/temporary.patch @@ -1143,10 +1143,10 @@ index a9705ce..1e2ad3d 100644 mlir::func::registerAllExtensions(dialects); diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch -index de92cb4..1bea535 100644 +index de92cb4..155d3f2 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch -@@ -1,4095 +1,78 @@ +@@ -1,4095 +1,901 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst ---- a/llvm/docs/NVPTXUsage.rst @@ -1154,11 +1154,7 @@ index de92cb4..1bea535 100644 -@@ -127,6 +127,69 @@ - NVPTX Intrinsics - ================ -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch1/include/toy/Lexer.h b/mlir/examples/toy/Ch1/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch1/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch1/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ - +- -+Address Space Conversion -+------------------------ -+ @@ -1224,18 +1220,14 @@ index de92cb4..1bea535 100644 -+ - Reading PTX Special Registers - ----------------------------- -+ #include "llvm/ADT/StringRef.h" - +- -diff -ruN --strip-trailing-cr a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst ---- a/llvm/docs/ReleaseNotes.rst -+++ b/llvm/docs/ReleaseNotes.rst -@@ -63,24 +63,6 @@ - * ``llvm.nvvm.bitcast.d2ll`` - * ``llvm.nvvm.bitcast.ll2d`` -++#include -+ #include -+ #include - +- --* Remove the following intrinsics which can be replaced with a funnel-shift: -- -- * ``llvm.nvvm.rotate.b32`` @@ -1256,11 +1248,7 @@ index de92cb4..1bea535 100644 -- - Changes to LLVM infrastructure - ------------------------------ -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch2/include/toy/Lexer.h b/mlir/examples/toy/Ch2/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch2/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch2/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ - +- -diff -ruN --strip-trailing-cr a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td ---- a/llvm/include/llvm/IR/IntrinsicsNVVM.td -+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1284,17 +1272,13 @@ index de92cb4..1bea535 100644 -+// * llvm.nvvm.bitcast.i2f --> ibid. -+// * llvm.nvvm.bitcast.d2ll --> ibid. -+// * llvm.nvvm.bitcast.ll2d --> ibid. -+ #include "llvm/ADT/StringRef.h" - +- - def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr - def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr -@@ -1610,6 +1602,40 @@ - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], - "llvm.nvvm.ldg.global.p">; -++#include -+ #include -+ #include - +- -+// Use for generic pointers -+// - These intrinsics are used to convert address spaces. -+// - The input pointer and output pointer must have the same type, except for @@ -1335,11 +1319,7 @@ index de92cb4..1bea535 100644 -@@ -4453,6 +4479,22 @@ - "llvm.nvvm.sust.p.3d.v4i32.trap">, - ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch3/include/toy/Lexer.h b/mlir/examples/toy/Ch3/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch3/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch3/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ - +- -+ -+def int_nvvm_rotate_b32 -+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], @@ -1381,14 +1361,72 @@ index de92cb4..1bea535 100644 -- Name.starts_with(".to.gen"); - else - Expand = false; -+ #include "llvm/ADT/StringRef.h" ++diff -ruN --strip-trailing-cr a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp ++--- a/clang/lib/CodeGen/CGDeclCXX.cpp +++++ b/clang/lib/CodeGen/CGDeclCXX.cpp ++@@ -640,13 +640,13 @@ ++ addUsedGlobal(COMDATKey); ++ } -@@ -2271,117 +2258,6 @@ - } -- } -++#include -+ #include -+ #include ++- // If comdats are in use and supported, place the initializer function into ++- // the comdat group of the global. In the MS ABI, initializers are mangled ++- // and have their own comdat, so we don't include them in the group for ++- // consistency with MSVC. +++ // If we used a COMDAT key for the global ctor, the init function can be +++ // discarded if the global ctor entry is discarded. +++ // FIXME: Do we need to restrict this to ELF and Wasm? ++ llvm::Comdat *C = Addr->getComdat(); ++- if (COMDATKey && C && getTriple().supportsCOMDAT() && ++- !getTarget().getCXXABI().isMicrosoft()) { +++ if (COMDATKey && C && +++ (getTarget().getTriple().isOSBinFormatELF() || +++ getTarget().getTriple().isOSBinFormatWasm())) { ++ Fn->setComdat(C); ++ } ++ } else { ++diff -ruN --strip-trailing-cr a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv ++--- a/libcxx/docs/Status/Cxx23Issues.csv +++++ b/libcxx/docs/Status/Cxx23Issues.csv ++@@ -296,7 +296,7 @@ ++ "`LWG3862 `__","``basic_const_iterator``'s ``common_type`` specialization is underconstrained","2023-02 (Issaquah)","","","" ++ "`LWG3865 `__","Sorting a range of ``pairs``","2023-02 (Issaquah)","|Complete|","17.0","" ++ "`LWG3869 `__","Deprecate ``std::errc`` constants related to UNIX STREAMS","2023-02 (Issaquah)","|Complete|","19.0","" ++-"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","|Complete|","20.0","" +++"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","","","" ++ "`LWG3871 `__","Adjust note about ``terminate``","2023-02 (Issaquah)","","","" ++ "`LWG3872 `__","``basic_const_iterator`` should have custom ``iter_move``","2023-02 (Issaquah)","","","" ++ "`LWG3875 `__","``std::ranges::repeat_view::iterator`` may be ill-formed","2023-02 (Issaquah)","|Complete|","17.0","" ++diff -ruN --strip-trailing-cr a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt ++--- a/libcxx/include/CMakeLists.txt +++++ b/libcxx/include/CMakeLists.txt ++@@ -560,6 +560,7 @@ ++ __memory/unique_temporary_buffer.h ++ __memory/uses_allocator.h ++ __memory/uses_allocator_construction.h +++ __memory/voidify.h ++ __memory_resource/memory_resource.h ++ __memory_resource/monotonic_buffer_resource.h ++ __memory_resource/polymorphic_allocator.h ++diff -ruN --strip-trailing-cr a/libcxx/include/__memory/construct_at.h b/libcxx/include/__memory/construct_at.h ++--- a/libcxx/include/__memory/construct_at.h +++++ b/libcxx/include/__memory/construct_at.h ++@@ -14,6 +14,7 @@ ++ #include <__config> ++ #include <__iterator/access.h> ++ #include <__memory/addressof.h> +++#include <__memory/voidify.h> ++ #include <__type_traits/enable_if.h> ++ #include <__type_traits/is_array.h> ++ #include <__utility/declval.h> ++@@ -37,7 +38,7 @@ ++ template ()) _Tp(std::declval<_Args>()...))> ++ _LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) { ++ _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"); ++- return ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); +++ return ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + } --static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, -- Function *F, IRBuilder<> &Builder) { @@ -1505,11 +1543,7 @@ index de92cb4..1bea535 100644 - IRBuilder<> &Builder) { - LLVMContext &C = F->getContext(); -@@ -4332,8 +4208,85 @@ -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch4/include/toy/Lexer.h b/mlir/examples/toy/Ch4/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch4/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch4/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ - +- - if (!IsX86 && Name == "stackprotectorcheck") { - Rep = nullptr; -+ } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { @@ -1604,30 +1638,58 @@ index de92cb4..1bea535 100644 - static const LLT S512 = LLT::scalar(512); --static const LLT S1024 = LLT::scalar(1024); - static const LLT MaxScalar = LLT::scalar(MaxRegisterSize); -+ #include "llvm/ADT/StringRef.h" ++ #endif ++@@ -48,7 +49,7 @@ ++ return std::construct_at(__location, std::forward<_Args>(__args)...); ++ #else ++ return _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"), ++- ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); +++ ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); ++ #endif ++ } - static const LLT V2S8 = LLT::fixed_vector(2, 8); -@@ -333,8 +332,8 @@ - static const LLT V2S128 = LLT::fixed_vector(2, 128); - static const LLT V4S128 = LLT::fixed_vector(4, 128); -++#include -+ #include -+ #include ++diff -ruN --strip-trailing-cr a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h ++--- a/libcxx/include/__memory/shared_ptr.h +++++ b/libcxx/include/__memory/shared_ptr.h ++@@ -248,35 +248,33 @@ ++ ++ template ++ struct __shared_ptr_emplace : __shared_weak_count { ++- using __value_type = __remove_cv_t<_Tp>; ++- ++ template ::value, int> = 0> ++ _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&...) : __storage_(std::move(__a)) { ++ static_assert( ++ sizeof...(_Args) == 0, "No argument should be provided to the control block when using _for_overwrite"); ++- ::new (static_cast(__get_elem())) __value_type; +++ ::new ((void*)__get_elem()) _Tp; ++ } --static std::initializer_list AllScalarTypes = { -- S32, S64, S96, S128, S160, S224, S256, S512, S1024}; -+static std::initializer_list AllScalarTypes = {S32, S64, S96, S128, -+ S160, S224, S256, S512}; -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch5/include/toy/Lexer.h b/mlir/examples/toy/Ch5/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch5/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch5/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ ++ template ::value, int> = 0> ++ _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&... __args) : __storage_(std::move(__a)) { ++- using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __value_type>::type; +++ using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __remove_cv_t<_Tp> >::type; ++ _TpAlloc __tmp(*__get_alloc()); ++ allocator_traits<_TpAlloc>::construct(__tmp, __get_elem(), std::forward<_Args>(__args)...); ++ } - static std::initializer_list AllS16Vectors{ - V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; -@@ -890,11 +889,10 @@ - .clampScalar(0, S16, S64); -+ #include "llvm/ADT/StringRef.h" ++ _LIBCPP_HIDE_FROM_ABI _Alloc* __get_alloc() _NOEXCEPT { return __storage_.__get_alloc(); } - getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) -- .legalIf(isRegisterClassType(0)) @@ -1645,9 +1707,8 @@ index de92cb4..1bea535 100644 -@@ -174,6 +174,10 @@ - def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" - "&& Subtarget->getPTXVersion() >= 64)">; -++#include -+ #include -+ #include ++- _LIBCPP_HIDE_FROM_ABI __value_type* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } +++ _LIBCPP_HIDE_FROM_ABI _Tp* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } -+def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; -+def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; @@ -1655,20 +1716,151 @@ index de92cb4..1bea535 100644 -+ - def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; - def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch6/include/toy/Lexer.h b/mlir/examples/toy/Ch6/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch6/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch6/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ ++ private: ++ template ::value, int> = 0> ++ _LIBCPP_HIDE_FROM_ABI void __on_zero_shared_impl() _NOEXCEPT { ++- __get_elem()->~__value_type(); +++ __get_elem()->~_Tp(); ++ } -@@ -1661,6 +1665,167 @@ - "brev.b64 \t$dst, $a;", - [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; -+ #include "llvm/ADT/StringRef.h" ++ template (__buffer_)->__alloc_); ++ } --+// ++- _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI __value_type* __get_elem() _NOEXCEPT { +++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { ++ return std::addressof(reinterpret_cast<_Data*>(__buffer_)->__elem_); ++ } ++ }; ++diff -ruN --strip-trailing-cr a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h ++--- a/libcxx/include/__memory/uninitialized_algorithms.h +++++ b/libcxx/include/__memory/uninitialized_algorithms.h ++@@ -21,6 +21,7 @@ ++ #include <__memory/allocator_traits.h> ++ #include <__memory/construct_at.h> ++ #include <__memory/pointer_traits.h> +++#include <__memory/voidify.h> ++ #include <__type_traits/enable_if.h> ++ #include <__type_traits/extent.h> ++ #include <__type_traits/is_array.h> ++@@ -63,7 +64,7 @@ ++ try { ++ #endif ++ for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); +++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); ++ #ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__ofirst, __idx); ++@@ -93,7 +94,7 @@ ++ try { ++ #endif ++ for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); +++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); ++ #ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__ofirst, __idx); ++@@ -123,7 +124,7 @@ ++ try { ++ #endif ++ for (; __idx != __last; ++__idx) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); +++ ::new (std::__voidify(*__idx)) _ValueType(__x); ++ #ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -151,7 +152,7 @@ ++ try { ++ #endif ++ for (; __n > 0; ++__idx, (void)--__n) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); +++ ::new (std::__voidify(*__idx)) _ValueType(__x); ++ #ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -181,7 +182,7 @@ ++ try { ++ # endif ++ for (; __idx != __last; ++__idx) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType; +++ ::new (std::__voidify(*__idx)) _ValueType; ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -207,7 +208,7 @@ ++ try { ++ # endif ++ for (; __n > 0; ++__idx, (void)--__n) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType; +++ ::new (std::__voidify(*__idx)) _ValueType; ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -234,7 +235,7 @@ ++ try { ++ # endif ++ for (; __idx != __last; ++__idx) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(); +++ ::new (std::__voidify(*__idx)) _ValueType(); ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -260,7 +261,7 @@ ++ try { ++ # endif ++ for (; __n > 0; ++__idx, (void)--__n) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(); +++ ::new (std::__voidify(*__idx)) _ValueType(); ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__first, __idx); ++@@ -296,7 +297,7 @@ ++ try { ++ # endif ++ for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) { ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); +++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); ++ } ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++@@ -334,7 +335,7 @@ ++ try { ++ # endif ++ for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n) ++- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); +++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); ++ # ifndef _LIBCPP_HAS_NO_EXCEPTIONS ++ } catch (...) { ++ std::__destroy(__ofirst, __idx); ++diff -ruN --strip-trailing-cr a/libcxx/include/__memory/voidify.h b/libcxx/include/__memory/voidify.h ++--- a/libcxx/include/__memory/voidify.h +++++ b/libcxx/include/__memory/voidify.h ++@@ -0,0 +1,30 @@ +++// -*- C++ -*- +++//===----------------------------------------------------------------------===// + +// -+// Rotate: Use ptx shf instruction if available. --+// --+ +++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +++// See https://llvm.org/LICENSE.txt for license information. +++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// +++//===----------------------------------------------------------------------===// + + -+// 32 bit r2 = rotl r1, n -+// => -+// r2 = shf.l r1, r1, n @@ -1774,7 +1966,9 @@ index de92cb4..1bea535 100644 -+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; -+def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), -+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; --+ +++#ifndef _LIBCPP___MEMORY_VOIDIFY_H +++#define _LIBCPP___MEMORY_VOIDIFY_H + + -+// 64-bit software rotate left by register. -+def ROTL64reg_sw : -+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), @@ -1789,7 +1983,9 @@ index de92cb4..1bea535 100644 -+ "add.u64 \t$dst, %lhs, %rhs;\n\t" -+ "}}", -+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>; --+ +++#include <__config> +++#include <__memory/addressof.h> + + -+def ROTR64reg_sw : -+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), -+ "{{\n\t" @@ -1803,43 +1999,46 @@ index de92cb4..1bea535 100644 -+ "add.u64 \t$dst, %lhs, %rhs;\n\t" -+ "}}", -+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>; --+ +++#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +++# pragma GCC system_header +++#endif + + -+// -+// Funnnel shift in clamp mode -+// --+ +++_LIBCPP_BEGIN_NAMESPACE_STD + + -+// Create SDNodes so they can be used in the DAG code, e.g. -+// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -+def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -+def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; --+ +++template +++_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void* __voidify(_Tp& __from) { +++ // Cast away cv-qualifiers to allow modifying elements of a range through const iterators. +++ return const_cast(static_cast(std::addressof(__from))); +++} + + -+def FUNSHFLCLAMP : -+ NVPTXInst<(outs Int32Regs:$dst), -+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), -+ "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", -+ [(set Int32Regs:$dst, -+ (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; --+ +++_LIBCPP_END_NAMESPACE_STD + + -+def FUNSHFRCLAMP : -+ NVPTXInst<(outs Int32Regs:$dst), -+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), -+ "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", -+ [(set Int32Regs:$dst, -+ (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; -++#include -+ #include -+ #include - +- - // - // BFE - bit-field extract -@@ -3492,42 +3657,6 @@ - def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), - (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -+diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch7/include/toy/Lexer.h b/mlir/examples/toy/Ch7/include/toy/Lexer.h -+--- a/mlir/examples/toy/Ch7/include/toy/Lexer.h -++++ b/mlir/examples/toy/Ch7/include/toy/Lexer.h -+@@ -15,6 +15,7 @@ - +- --// --// Funnel-Shift --// @@ -1884,12 +2083,8 @@ index de92cb4..1bea535 100644 -+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td -@@ -2537,45 +2537,59 @@ - : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -+ #include "llvm/ADT/StringRef.h" - -++#include -+ #include -+ #include - +- +- --multiclass NG_TO_G { -+multiclass NG_TO_G { - def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), @@ -1907,7 +2102,55 @@ index de92cb4..1bea535 100644 -+ [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, -+ Requires<[ShortPtr]>; - } -- +++#endif // _LIBCPP___MEMORY_VOIDIFY_H ++diff -ruN --strip-trailing-cr a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap ++--- a/libcxx/include/module.modulemap +++++ b/libcxx/include/module.modulemap ++@@ -1528,6 +1528,7 @@ ++ } ++ module uses_allocator { header "__memory/uses_allocator.h" } ++ module uses_allocator_construction { header "__memory/uses_allocator_construction.h" } +++ module voidify { header "__memory/voidify.h" } ++ ++ header "memory" ++ export * ++diff -ruN --strip-trailing-cr a/libcxx/include/optional b/libcxx/include/optional ++--- a/libcxx/include/optional +++++ b/libcxx/include/optional ++@@ -287,7 +287,7 @@ ++ static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); ++ union { ++ char __null_state_; ++- remove_cv_t __val_; +++ value_type __val_; ++ }; ++ bool __engaged_; ++ ++@@ -323,7 +323,7 @@ ++ static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); ++ union { ++ char __null_state_; ++- remove_cv_t __val_; +++ value_type __val_; ++ }; ++ bool __engaged_; ++ ++@@ -377,7 +377,7 @@ ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from(_That&& __opt) { ++ if (this->__engaged_ == __opt.has_value()) { ++ if (this->__engaged_) ++- static_cast<_Tp&>(this->__val_) = std::forward<_That>(__opt).__get(); +++ this->__val_ = std::forward<_That>(__opt).__get(); ++ } else { ++ if (this->__engaged_) ++ this->reset(); ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp ++@@ -80,6 +80,21 @@ ++ a.deallocate(p, 2); ++ } + --multiclass G_TO_NG { -+multiclass G_TO_NG { - def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), @@ -1924,8 +2167,24 @@ index de92cb4..1bea535 100644 -+ #" cvt.u32.u64 \t$result, %tmp; }}", -+ [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, -+ Requires<[ShortPtr]>; -- } -- +++ { +++ std::allocator a; +++ Counted const* p = a.allocate(2); +++ int count = 0; +++ std::construct_at(p, count); +++ assert(count == 1); +++ std::construct_at(p+1, count); +++ assert(count == 2); +++ (p+1)->~Counted(); +++ assert(count == 1); +++ p->~Counted(); +++ assert(count == 0); +++ a.deallocate(const_cast(p), 2); +++ } +++ ++ return true; + } + --defm cvta_local : NG_TO_G<"local">; --defm cvta_shared : NG_TO_G<"shared">; --defm cvta_global : NG_TO_G<"global">; @@ -1977,8 +2236,18 @@ index de92cb4..1bea535 100644 -@@ -2618,6 +2632,24 @@ - [(set Int64Regs:$r, - (int_nvvm_move_ptr texternalsym:$s))]>;*/ -- --+ ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp ++@@ -99,6 +99,16 @@ ++ alloc.deallocate(out, 2); ++ } + +++ // Works with const pointers. +++ { +++ int x = 1; +++ const int* ptr = &x; + + -+// MoveParam %r1, param -+// ptr_local_to_gen %r2, %r1 -+// ptr_gen_to_local %r3, %r2 @@ -1995,14 +2264,26 @@ index de92cb4..1bea535 100644 -+def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen -+ (MoveParam texternalsym:$src)))), -+ (nvvm_move_ptr32 texternalsym:$src)>; --+ +++ const int* result = std::ranges::construct_at(ptr, 42); +++ assert(result == ptr); +++ assert(x == 42); +++ } + + - def texsurf_handles - : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), - "mov.u64 \t$result, $src;", []>; -@@ -2701,9 +2733,134 @@ - def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; -- -- ++ return true; ++ } + ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp ++@@ -75,5 +75,17 @@ ++ } ++ #endif // TEST_HAS_NO_EXCEPTIONS + -+// rotate builtin support -+ -+def ROTATE_B32_HW_IMM @@ -2071,13 +2352,23 @@ index de92cb4..1bea535 100644 -+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), -+ "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, -+ Requires<[hasHWROT32]>; --+ +++ // Works with const iterators. +++ { +++ constexpr int N = 5; +++ Buffer buf; + + -+ def SHF_R_WRAP_B32_IMM -+ : NVPTXInst<(outs Int32Regs:$dst), -+ (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), -+ "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, -+ Requires<[hasHWROT32]>; --+ +++ std::ranges::uninitialized_default_construct_n(buf.cbegin(), N); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } + + -+ def SHF_R_WRAP_B32_REG -+ : NVPTXInst<(outs Int32Regs:$dst), -+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), @@ -2235,8 +2526,42 @@ index de92cb4..1bea535 100644 -+ ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), -+ Src)); - return; -- } -- } ++ return 0; ++ } ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp ++@@ -163,5 +163,30 @@ + } ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators, (iter, sentinel) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ +++ std::ranges::uninitialized_default_construct(buf.cbegin(), buf.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ +++ // Works with const iterators, (range) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); +++ +++ std::ranges::uninitialized_default_construct(range); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ ++ return 0; + } -diff -ruN --strip-trailing-cr a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp ---- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp -+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -4302,19 +4627,35 @@ index de92cb4..1bea535 100644 - %1:_(s32) = G_EXTRACT %0, 0 -@@ -297,6 +296,18 @@ - ... -- ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp ++@@ -94,5 +94,17 @@ ++ } ++ #endif // TEST_HAS_NO_EXCEPTIONS + - --- -+name: test_implicit_def_v17s32 -+body: | -+ bb.0: --+ +++ // Works with const iterators. +++ { +++ constexpr int N = 5; +++ Buffer buf; + + -+ ; CHECK-LABEL: name: test_implicit_def_v17s32 -+ ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF -+ ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) -+ %0:_(<17 x s32>) = G_IMPLICIT_DEF -+ S_NOP 0, implicit %0 -+... --+ +++ std::ranges::uninitialized_value_construct_n(buf.cbegin(), N); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } + + -+--- - name: test_implicit_def_v32s32 - body: | @@ -4561,13 +4902,121 @@ index de92cb4..1bea535 100644 -+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir -@@ -42,6 +42,8 @@ - ret void -- } ++ return 0; ++ } ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp ++@@ -183,5 +183,30 @@ + } - -+ define void @non_power_of_2() { ret void } -+ - define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) { - ret void -- } ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators, (iter, sentinel) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ +++ std::ranges::uninitialized_value_construct(buf.cbegin(), buf.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ +++ // Works with const iterators, (range) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ +++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); +++ std::ranges::uninitialized_value_construct(range); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ ++ return 0; ++ } ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp ++@@ -104,6 +104,22 @@ ++ ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::uninitialized_copy_n(in, N, out.cbegin(), out.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ ++ // Conversions. ++ { ++ constexpr int N = 3; ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp ++@@ -278,6 +278,39 @@ ++ Counted::reset(); ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators, (iter, sentinel) overload. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::uninitialized_copy(in, in + N, out.cbegin(), out.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ +++ // Works with const iterators, (range) overload. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::subrange out_range(out.cbegin(), out.cend()); +++ std::ranges::uninitialized_copy(in, out_range); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ ++ // Conversions, (iter, sentinel) overload. ++ { ++ constexpr int N = 3; ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp ++@@ -198,5 +198,34 @@ + } -@@ -185,6 +187,23 @@ - ... - @@ -4591,7 +5040,46 @@ index de92cb4..1bea535 100644 -+--- - name: load_constant_v4i16_from_8_align8 - legalized: true -- ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators, (iter, sentinel) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ +++ std::ranges::uninitialized_fill(buf.cbegin(), buf.cend(), x); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::all_of(buf.begin(), buf.end(), pred)); +++ +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ +++ // Works with const iterators, (range) overload. +++ { +++ constexpr int N = 5; +++ Buffer buf; +++ +++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); +++ std::ranges::uninitialized_fill(range, x); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::all_of(buf.begin(), buf.end(), pred)); +++ +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } +++ ++ return 0; ++ } ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp ++@@ -101,5 +101,19 @@ ++ } ++ #endif // TEST_HAS_NO_EXCEPTIONS + -diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll ---- a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll -+++ b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll @@ -4601,11 +5089,22 @@ index de92cb4..1bea535 100644 -+; Address space intrinsics were erroneously marked NoCapture, leading to bad -+; optimizations (such as the store below being eliminated as dead code). This -+; test makes sure we don't regress. --+ +++ // Works with const iterators. +++ { +++ constexpr int N = 5; +++ Buffer buf; + + -+declare void @foo(ptr addrspace(1)) --+ +++ std::ranges::uninitialized_fill_n(buf.cbegin(), N, x); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::all_of(buf.begin(), buf.end(), pred)); + + -+declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) --+ +++ std::destroy(buf.begin(), buf.end()); +++ Counted::reset(); +++ } + + -+; CHECK: @bar -+define void @bar() { -+ %t1 = alloca i32 @@ -4647,7 +5146,8 @@ index de92cb4..1bea535 100644 -+; CHECK: ret - %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) - ret i64 %val -- } ++ return 0; + } - -+; CHECK: rotateright64 - define i64 @rotateright64(i64 %a, i32 %b) { @@ -4809,7 +5309,130 @@ index de92cb4..1bea535 100644 -+; SM20-NEXT: } -+; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; - ; SM20-NEXT: ret; -- ; ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp ++@@ -105,6 +105,22 @@ ++ ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::uninitialized_move_n(in, N, out.cbegin(), out.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ ++ // Conversions. ++ { ++ constexpr int N = 3; ++diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp ++--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp ++@@ -282,6 +282,39 @@ ++ Counted::reset(); ++ #endif // TEST_HAS_NO_EXCEPTIONS ++ +++ // Works with const iterators, (iter, sentinel) overload. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::uninitialized_move(in, in + N, out.cbegin(), out.cend()); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ +++ // Works with const iterators, (range) overload. +++ { +++ constexpr int N = 5; +++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; +++ Buffer out; +++ Counted::reset(); +++ +++ std::ranges::subrange out_range (out.cbegin(), out.cend()); +++ std::ranges::uninitialized_move(in, out_range); +++ assert(Counted::current_objects == N); +++ assert(Counted::total_objects == N); +++ assert(std::equal(in, in + N, out.begin(), out.end())); +++ +++ std::destroy(out.begin(), out.end()); +++ } +++ Counted::reset(); +++ ++ // Conversions, (iter, sentinel) overload. ++ { ++ constexpr int N = 3; ++diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++@@ -10287,10 +10287,8 @@ ++ SDValue LeftOp = ShiftOperand.getOperand(0); ++ SDValue RightOp = ShiftOperand.getOperand(1); ++ ++- // Treat zext nneg as sext - we might need to support handling these as zext ++- // as well in the future, but for now just prefer sext. ++- bool IsSignExt = sd_match(LeftOp, m_SExtLike(m_Value())); ++- bool IsZeroExt = sd_match(LeftOp, m_ZExt(m_Value())); +++ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; +++ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; ++ ++ if (!IsSignExt && !IsZeroExt) ++ return SDValue(); ++diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++@@ -9181,12 +9181,13 @@ ++ for (unsigned Cnt : Slices) { ++ ArrayRef Slice = VL.slice(Cnt, VF); ++ // If any instruction is vectorized already - do not try again. ++- if (const TreeEntry *SE = getTreeEntry(Slice.front()); +++ if (TreeEntry *SE = getTreeEntry(Slice.front()); ++ SE || getTreeEntry(Slice.back())) { ++ if (!SE) ++ continue; ++ if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) ++ continue; +++ SE->UserTreeIndices.emplace_back(&E, UINT_MAX); ++ AddCombinedNode(SE->Idx, Cnt); ++ continue; ++ } ++@@ -13396,7 +13397,12 @@ ++ if (CommonMask[Idx] != PoisonMaskElem) ++ CommonMask[Idx] = Idx; ++ for (auto [E, Idx] : SubVectors) { ++- Value *V = castToScalarTyElem(E->VectorizedValue); +++ Value *V = E->VectorizedValue; +++ if (V->getType()->isIntOrIntVectorTy()) +++ V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) { +++ return !isKnownNonNegative( +++ V, SimplifyQuery(*R.DL)); +++ })); ++ Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, ++ Builder.getInt64(Idx)); ++ if (!CommonMask.empty()) { ++diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll ++--- a/llvm/test/CodeGen/X86/pmulh.ll +++++ b/llvm/test/CodeGen/X86/pmulh.ll ++@@ -953,15 +953,39 @@ ++ ; SSE-NEXT: movdqa %xmm0, 16(%rdi) ++ ; SSE-NEXT: retq + ; - ; SM35-LABEL: rotateright64( - ; SM35: { --; SM35-NEXT: .reg .b32 %r<5>; @@ -5298,8 +5921,187 @@ index de92cb4..1bea535 100644 - ; CHECK-NEXT:.b8 1 // DW_AT_call_file - ; CHECK-NEXT:.b8 6 // DW_AT_call_line - ; CHECK-NEXT:.b8 37 // DW_AT_call_column ++-; AVX-LABEL: PR109790: ++-; AVX: # %bb.0: ++-; AVX-NEXT: movq %rdi, %rax ++-; AVX-NEXT: vmovdqa (%rsi), %ymm0 ++-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++-; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] ++-; AVX-NEXT: vmovdqa %ymm0, (%rdi) ++-; AVX-NEXT: vzeroupper ++-; AVX-NEXT: retq +++; AVX2-LABEL: PR109790: +++; AVX2: # %bb.0: +++; AVX2-NEXT: movq %rdi, %rax +++; AVX2-NEXT: vmovdqa (%rsi), %ymm0 +++; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +++; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] +++; AVX2-NEXT: vmovdqa %ymm0, (%rdi) +++; AVX2-NEXT: vzeroupper +++; AVX2-NEXT: retq +++; +++; AVX512F-LABEL: PR109790: +++; AVX512F: # %bb.0: +++; AVX512F-NEXT: movq %rdi, %rax +++; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 +++; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +++; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +++; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +++; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0 +++; AVX512F-NEXT: vpmovdw %zmm0, (%rdi) +++; AVX512F-NEXT: vzeroupper +++; AVX512F-NEXT: retq +++; +++; AVX512BW-LABEL: PR109790: +++; AVX512BW: # %bb.0: +++; AVX512BW-NEXT: movq %rdi, %rax +++; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0 +++; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +++; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +++; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0] +++; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0 +++; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) +++; AVX512BW-NEXT: vzeroupper +++; AVX512BW-NEXT: retq ++ %load = load <16 x i16>, ptr %a, align 32 ++ %and = and <16 x i16> %load, ++ %ext = zext nneg <16 x i16> %and to <16 x i32> ++diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll ++--- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +++++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll ++@@ -0,0 +1,97 @@ +++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +++; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +++ +++define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { +++; CHECK-LABEL: define i1 @test( +++; CHECK-SAME: i64 [[V1:%.*]], ptr [[V2:%.*]], i32 [[V3:%.*]], i1 [[V4:%.*]]) { +++; CHECK-NEXT: [[NEWFUNCROOT:.*:]] +++; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 +++; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <2 x i32> zeroinitializer +++; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +++; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i8> +++; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i8> [[TMP3]], +++; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> +++; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer +++; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0 +++; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> +++; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> +++; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 0) +++; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float> +++; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]] +++; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> poison, i1 [[V4]], i32 0 +++; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> +++; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP14]], <2 x i1> [[TMP6]], i64 0) +++; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x float> zeroinitializer, <4 x float> [[TMP12]] +++; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +++; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 +++; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0 +++; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32> +++; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], +++; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 +++; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false +++; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +++; CHECK-NEXT: [[CONV_I_I1743_2:%.*]] = fptoui float [[TMP22]] to i32 +++; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2 +++; CHECK-NEXT: [[NARROW1:%.*]] = select i1 [[TMP23]], i32 [[CONV_I_I1743_2]], i32 0 +++; CHECK-NEXT: [[TMP24:%.*]] = zext i1 [[NARROW]] to i32 +++; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[NARROW1]], [[TMP24]] +++; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +++; CHECK-NEXT: [[CONV_I_I1743_1:%.*]] = fptoui float [[TMP26]] to i32 +++; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1 +++; CHECK-NEXT: [[NARROW2:%.*]] = select i1 [[TMP27]], i32 [[CONV_I_I1743_1]], i32 0 +++; CHECK-NEXT: [[RV3:%.*]] = or i32 [[TMP25]], [[NARROW2]] +++; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +++; CHECK-NEXT: [[CONV_I_I1743:%.*]] = fptoui float [[TMP28]] to i32 +++; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 +++; CHECK-NEXT: [[NARROW4:%.*]] = select i1 [[TMP29]], i32 [[CONV_I_I1743]], i32 0 +++; CHECK-NEXT: [[RT5:%.*]] = or i32 [[RV3]], [[NARROW4]] +++; CHECK-NEXT: [[RT:%.*]] = zext i32 [[RT5]] to i64 +++; CHECK-NEXT: store i64 [[RT]], ptr [[V2]], align 1 +++; CHECK-NEXT: ret i1 false +++; +++newFuncRoot: +++ %conv.i147.i1756.3 = uitofp i32 %v3 to float +++ %div.i.i.i1749.3 = fdiv float 0.000000e+00, %conv.i147.i1756.3 +++ %cond.i.i.i1751.3 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.3 +++ %conv.i147.i1756.2 = uitofp i32 %v3 to float +++ %div.i.i.i1749.2 = fdiv float 0.000000e+00, %conv.i147.i1756.2 +++ %cond.i.i.i1751.2 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.2 +++ %0 = lshr i64 %v1, 40 +++ %1 = trunc i64 %0 to i32 +++ %tt2 = and i32 %1, 255 +++ %cmp1.i.i.i1746.1 = icmp eq i32 %tt2, 0 +++ %conv.i147.i1756.1 = uitofp i32 %tt2 to float +++ %div.i.i.i1749.1 = fdiv float 0.000000e+00, %conv.i147.i1756.1 +++ %cond.i.i.i1751.1 = select i1 %cmp1.i.i.i1746.1, float 0.000000e+00, float %div.i.i.i1749.1 +++ %tt3 = lshr i64 %v1, 32 +++ %2 = trunc i64 %tt3 to i32 +++ %tt1 = and i32 %2, 1 +++ %cmp1.i.i.i1746 = icmp eq i32 %tt1, 0 +++ %conv.i147.i1756 = uitofp i32 %tt1 to float +++ %div.i.i.i1749 = fdiv float 0.000000e+00, %conv.i147.i1756 +++ %cond.i.i.i1751 = select i1 %cmp1.i.i.i1746, float 0.000000e+00, float %div.i.i.i1749 +++ %3 = bitcast float %cond.i.i.i1751.3 to i32 +++ %cmp.i99.i1736.3 = icmp ult i32 %3, 1333788672 +++ %conv.i.i1743.3 = fptoui float %cond.i.i.i1751.3 to i32 +++ %4 = icmp ne i32 %conv.i.i1743.3, 0 +++ %narrow = select i1 %cmp.i99.i1736.3, i1 %4, i1 false +++ %5 = bitcast float %cond.i.i.i1751.2 to i32 +++ %cmp.i99.i1736.2 = icmp ult i32 %5, 1333788672 +++ %conv.i.i1743.2 = fptoui float %cond.i.i.i1751.2 to i32 +++ %narrow1 = select i1 %cmp.i99.i1736.2, i32 %conv.i.i1743.2, i32 0 +++ %6 = zext i1 %narrow to i32 +++ %7 = or i32 %narrow1, %6 +++ %8 = bitcast float %cond.i.i.i1751.1 to i32 +++ %cmp.i99.i1736.1 = icmp ult i32 %8, 1333788672 +++ %conv.i.i1743.1 = fptoui float %cond.i.i.i1751.1 to i32 +++ %narrow2 = select i1 %cmp.i99.i1736.1, i32 %conv.i.i1743.1, i32 0 +++ %rv3 = or i32 %7, %narrow2 +++ %9 = bitcast float %cond.i.i.i1751 to i32 +++ %cmp.i99.i1736 = icmp ult i32 %9, 1333788672 +++ %conv.i.i1743 = fptoui float %cond.i.i.i1751 to i32 +++ %narrow4 = select i1 %cmp.i99.i1736, i32 %conv.i.i1743, i32 0 +++ %rt5 = or i32 %rv3, %narrow4 +++ %rt = zext i32 %rt5 to i64 +++ store i64 %rt, ptr %v2, align 1 +++ ret i1 false +++} ++diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn ++--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn ++@@ -280,6 +280,7 @@ ++ "rdpruintrin.h", ++ "rdseedintrin.h", ++ "riscv_bitmanip.h", +++ "riscv_corev_alu.h", ++ "riscv_crypto.h", ++ "riscv_ntlh.h", ++ "rtmintrin.h", ++diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn ++--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn ++@@ -632,6 +632,7 @@ ++ "__memory/unique_temporary_buffer.h", ++ "__memory/uses_allocator.h", ++ "__memory/uses_allocator_construction.h", +++ "__memory/voidify.h", ++ "__memory_resource/memory_resource.h", ++ "__memory_resource/monotonic_buffer_resource.h", ++ "__memory_resource/polymorphic_allocator.h", ++diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ++--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ++@@ -358,6 +358,7 @@ ++ td_file = "include/clang/Basic/BuiltinsRISCV.td", ++ td_srcs = [ ++ "include/clang/Basic/BuiltinsRISCV.td", +++ "include/clang/Basic/BuiltinsRISCVXCV.td", ++ "include/clang/Basic/BuiltinsBase.td", ++ ], ++ ) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl -index af35fe7..ad9923c 100644 +index af35fe7..80f07d3 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") @@ -5308,920 +6110,13 @@ index af35fe7..ad9923c 100644 """Imports LLVM.""" - LLVM_COMMIT = "9830156f623c56062bf6df1b4c4b4bd8ab5bd57c" - LLVM_SHA256 = "85bb9a61cfdaf0d3386890dc7b4bbaa17eecf4b70b60c314307f2ca3919b9035" -+ LLVM_COMMIT = "6292f117c39b9fc72da4e40328eeeda2aa94a5f2" -+ LLVM_SHA256 = "31f583de2e077f9289fc5efea74bd6e1a1694fda5f77f09472253cdc072f2e5e" ++ LLVM_COMMIT = "00128a20eec27246719d73ba427bf821883b00b4" ++ LLVM_SHA256 = "9fff2ccb6c262f3d5e2f98c281a0b99a585daee83742e1599709ff61cfc222af" tf_http_archive( name = name, -diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch -index 8b13789..7102b01 100755 ---- a/third_party/stablehlo/temporary.patch -+++ b/third_party/stablehlo/temporary.patch -@@ -1 +1,902 @@ -+diff --ruN a/stablehlo/examples/c++/ExampleAdd.cpp b/stablehlo/examples/c++/ExampleAdd.cpp -+--- stablehlo/examples/c++/ExampleAdd.cpp -++++ stablehlo/examples/c++/ExampleAdd.cpp -+@@ -18,7 +18,7 @@ -+ #include "llvm/ADT/SmallVector.h" -+ #include "llvm/Support/LogicalResult.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/Block.h" -+ #include "mlir/IR/Builders.h" -+@@ -43,7 +43,7 @@ -+ mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); -+ module->getContext()->loadDialect(); -+ module->getContext()->loadDialect(); -+- module->getContext()->loadDialect(); -++ module->getContext()->loadDialect(); -+ module->setName("test_module"); -+ -+ /** create function **/ -+diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloQuantLegalizeToTosaRescale.cpp b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloQuantLegalizeToTosaRescale.cpp -+--- stablehlo/stablehlo/conversions/tosa/transforms/StablehloQuantLegalizeToTosaRescale.cpp -++++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloQuantLegalizeToTosaRescale.cpp -+@@ -17,7 +17,7 @@ -+ #include -+ -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/Dialect/Tosa/IR/TosaOps.h" -+ #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" -+ #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" -+diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/TosaRescaleLegalizeToStablehlo.cpp b/stablehlo/stablehlo/conversions/tosa/transforms/TosaRescaleLegalizeToStablehlo.cpp -+--- stablehlo/stablehlo/conversions/tosa/transforms/TosaRescaleLegalizeToStablehlo.cpp -++++ stablehlo/stablehlo/conversions/tosa/transforms/TosaRescaleLegalizeToStablehlo.cpp -+@@ -18,7 +18,7 @@ -+ #include -+ -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/Dialect/Tosa/IR/TosaOps.h" -+ #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" -+ #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" -+diff --ruN a/stablehlo/stablehlo/dialect/Base.cpp b/stablehlo/stablehlo/dialect/Base.cpp -+--- stablehlo/stablehlo/dialect/Base.cpp -++++ stablehlo/stablehlo/dialect/Base.cpp -+@@ -31,7 +31,7 @@ -+ #include "llvm/ADT/SmallVector.h" -+ #include "llvm/Support/Debug.h" -+ #include "llvm/Support/ErrorHandling.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/Dialect/Shape/IR/Shape.h" -+ #include "mlir/IR/Builders.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+diff --ruN a/stablehlo/stablehlo/dialect/ChloOps.h b/stablehlo/stablehlo/dialect/ChloOps.h -+--- stablehlo/stablehlo/dialect/ChloOps.h -++++ stablehlo/stablehlo/dialect/ChloOps.h -+@@ -20,7 +20,7 @@ -+ #include "llvm/ADT/APFloat.h" -+ #include "llvm/ADT/StringRef.h" -+ #include "mlir/Bytecode/BytecodeOpInterface.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/Builders.h" -+ #include "mlir/IR/BuiltinTypes.h" -+diff --ruN a/stablehlo/stablehlo/dialect/Register.cpp b/stablehlo/stablehlo/dialect/Register.cpp -+--- stablehlo/stablehlo/dialect/Register.cpp -++++ stablehlo/stablehlo/dialect/Register.cpp -+@@ -17,7 +17,7 @@ -+ #include "stablehlo/dialect/Register.h" -+ -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" -+ #include "mlir/IR/DialectRegistry.h" -+ #include "stablehlo/dialect/ChloOps.h" -+@@ -30,7 +30,7 @@ -+ void registerAllDialects(mlir::DialectRegistry ®istry) { -+ // clang-format off -+ registry.insert(); -+ registry.insert -+ -+ #include "llvm/ADT/StringRef.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/Dialect/Shape/IR/Shape.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/Builders.h" -+diff --ruN a/stablehlo/stablehlo/dialect/TypeInference.cpp b/stablehlo/stablehlo/dialect/TypeInference.cpp -+--- stablehlo/stablehlo/dialect/TypeInference.cpp -++++ stablehlo/stablehlo/dialect/TypeInference.cpp -+@@ -52,7 +52,7 @@ -+ #include "llvm/Support/Regex.h" -+ #include "llvm/Support/raw_ostream.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/Builders.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+diff --ruN a/stablehlo/stablehlo/dialect/VhloTypes.cpp b/stablehlo/stablehlo/dialect/VhloTypes.cpp -+--- stablehlo/stablehlo/dialect/VhloTypes.cpp -++++ stablehlo/stablehlo/dialect/VhloTypes.cpp -+@@ -20,7 +20,7 @@ -+ #include "llvm/ADT/SmallVectorExtras.h" -+ #include "llvm/ADT/StringRef.h" -+ #include "llvm/ADT/TypeSwitch.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/Dialect/Shape/IR/Shape.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/BuiltinTypes.h" -+diff --ruN a/stablehlo/stablehlo/reference/Api.cpp b/stablehlo/stablehlo/reference/Api.cpp -+--- stablehlo/stablehlo/reference/Api.cpp -++++ stablehlo/stablehlo/reference/Api.cpp -+@@ -31,7 +31,7 @@ -+ #include "llvm/Support/Path.h" -+ #include "llvm/Support/SourceMgr.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+ #include "mlir/IR/BuiltinOps.h" -+ #include "mlir/IR/BuiltinTypeInterfaces.h" -+diff --ruN a/stablehlo/stablehlo/tests/CheckOps.h b/stablehlo/stablehlo/tests/CheckOps.h -+--- stablehlo/stablehlo/tests/CheckOps.h -++++ stablehlo/stablehlo/tests/CheckOps.h -+@@ -17,7 +17,7 @@ -+ #define STABLEHLO_DIALECT_CHECKOPS_H_ -+ -+ #include "mlir/Bytecode/BytecodeOpInterface.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+ #include "mlir/IR/BuiltinTypes.h" -+ #include "mlir/IR/Dialect.h" -+diff --ruN a/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir b/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -+--- stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -++++ stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -+@@ -1338,24 +1338,24 @@ -+ -+ // ----- -+ -++// expected-error@+1 {{scale out of expressed type range}} -+ func.func @quantized_element_type_c6(%arg0: tensor<1x2x!quant.uniform>) { -+- // expected-error-re@+1 {{operand #0 must be ranked tensor of {{.*}} 2/4/8/16/32-bit uniform quantized signed integer or 2/4/8/16/32-bit uniform quantized unsigned integer or 2/4/8/16/32-bit uniform quantized per axis signed integer or 2/4/8/16/32-bit uniform quantized per axis unsigned integer values, but got 'tensor<1x2x!quant.uniform>'}} -+ %0 = stablehlo.add %arg0, %arg0 : tensor<1x2x!quant.uniform> -+ func.return -+ } -+ -+ // ----- -+ -++// expected-error@+1 {{scale out of expressed type range}} -+ func.func @quantized_element_type_c6(%arg0: tensor<1x2x!quant.uniform>) { -+- // expected-error-re@+1 {{operand #0 must be ranked tensor of {{.*}} 2/4/8/16/32-bit uniform quantized signed integer or 2/4/8/16/32-bit uniform quantized unsigned integer or 2/4/8/16/32-bit uniform quantized per axis signed integer or 2/4/8/16/32-bit uniform quantized per axis unsigned integer values, but got 'tensor<1x2x!quant.uniform>'}} -+ %0 = stablehlo.add %arg0, %arg0 : tensor<1x2x!quant.uniform> -+ func.return -+ } -+ -+ // ----- -+ -++// expected-error@+1 {{illegal quantized dimension: -1}} -+ func.func @quantized_element_type_c11(%arg0: tensor<1x5x2x!quant.uniform:f32:-1, {0.1:-30, 0.1:-30}>>) { -+- // expected-error-re@+1 {{operand #0 must be ranked tensor of {{.*}} 2/4/8/16/32-bit uniform quantized signed integer or 2/4/8/16/32-bit uniform quantized unsigned integer or 2/4/8/16/32-bit uniform quantized per axis signed integer or 2/4/8/16/32-bit uniform quantized per axis unsigned integer values, but got 'tensor<1x5x2x!quant.uniform>'}} -+ %0 = stablehlo.add %arg0, %arg0 : tensor<1x5x2x!quant.uniform:f32:-1, {0.1:-30, 0.1:-30}>> -+ func.return -+ } -+diff --ruN a/stablehlo/stablehlo/tests/transforms/stablehlo_create_compatibility_expander.mlir b/stablehlo/stablehlo/tests/transforms/stablehlo_create_compatibility_expander.mlir -+--- stablehlo/stablehlo/tests/transforms/stablehlo_create_compatibility_expander.mlir -++++ stablehlo/stablehlo/tests/transforms/stablehlo_create_compatibility_expander.mlir -+@@ -69,7 +69,7 @@ -+ index_vector_dim = 3 -+ >, -+ slice_sizes = array, -+- indices_are_sorted = true -++ indices_are_sorted = false -+ } : (tensor<3x2x4x7x9xi32>, tensor<4x3x5x2xi32>) -> tensor<4x3x5x8xi32> -+ func.return %0 : tensor<4x3x5x8xi32> -+ } -+@@ -77,9 +77,9 @@ -+ // ----- -+ -+ // CHECK-LABEL: @gather_with_batching_no_index_vector_dim -++// CHECK-NEXT: %[[reshape:.*]] = stablehlo.reshape %arg1 : (tensor<4x3x5xi32>) -> tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x3x5x1xi32> -+-// CHECK-NEXT: %[[reshape:.*]] = stablehlo.reshape %arg1 : (tensor<4x3x5xi32>) -> tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %[[reshape]], dim = 3 : (tensor<4x3x5x1xi32>, tensor<4x3x5x1xi32>, tensor<4x3x5x1xi32>) -> tensor<4x3x5x3xi32> -+ // CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -+ // CHECK-SAME: dimension_numbers = #stablehlo.gather< -+@@ -102,7 +102,7 @@ -+ index_vector_dim = 3 -+ >, -+ slice_sizes = array, -+- indices_are_sorted = true -++ indices_are_sorted = false -+ }> : (tensor<3x2x4x9xi32>, tensor<4x3x5xi32>) -> tensor<4x3x5x8xi32> -+ func.return %0 : tensor<4x3x5x8xi32> -+ } -+@@ -133,9 +133,305 @@ -+ index_vector_dim = 3 -+ >, -+ slice_sizes = array, -+- indices_are_sorted = true -++ indices_are_sorted = false -+ }> : (tensor<0x2x9xi32>, tensor<0x3x5x1xi32>) -> tensor<0x3x5x8xi32> -+ func.return %0 : tensor<0x3x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dims_indices_become_unsorted -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 0 : tensor<3x4x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 1 : tensor<3x4x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<3x4x5x1xi32>, tensor<3x4x5x1xi32>, tensor<3x4x5x2xi32>) -> tensor<3x4x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 2, 1, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<3x2x4x7x9xi32>, tensor<3x4x5x4xi32>) -> tensor<3x4x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<3x4x5x8xi32> -++func.func @gather_batching_dims_indices_become_unsorted(%arg0: tensor<3x2x4x7x9xi32>, %arg1: tensor<3x4x5x2xi32>) -> tensor<3x4x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [0, 1], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = true -++ } : (tensor<3x2x4x7x9xi32>, tensor<3x4x5x2xi32>) -> tensor<3x4x5x8xi32> -++ func.return %0 : tensor<3x4x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dims_indices_become_unsorted_2 -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<2x3x5x1xi32>, tensor<2x3x5x1xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 1, 2, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<3x2x4x7x9xi32>, tensor<2x3x5x4xi32>) -> tensor<2x3x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<2x3x5x8xi32> -++func.func @gather_batching_dims_indices_become_unsorted_2(%arg0: tensor<3x2x4x7x9xi32>, %arg1: tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [2, 3], -++ operand_batching_dims = [0, 1], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [2, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = true -++ } : (tensor<3x2x4x7x9xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> -++ func.return %0 : tensor<2x3x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dims_indices_remain_sorted -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 0 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 2 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<2x3x5x1xi32>, tensor<2x3x5x1xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 1, 2, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = true, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<2x5x4x7x9xi32>, tensor<2x3x5x4xi32>) -> tensor<2x3x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<2x3x5x8xi32> -++func.func @gather_batching_dims_indices_remain_sorted(%arg0: tensor<2x5x4x7x9xi32>, %arg1: tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [2, 3], -++ operand_batching_dims = [0, 1], -++ start_indices_batching_dims = [0, 2], -++ start_index_map = [2, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = true -++ } : (tensor<2x5x4x7x9xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> -++ func.return %0 : tensor<2x3x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dims_indices_remain_unsorted -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 0 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 2 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<2x3x5x1xi32>, tensor<2x3x5x1xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 1, 2, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<2x5x4x7x9xi32>, tensor<2x3x5x4xi32>) -> tensor<2x3x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<2x3x5x8xi32> -++func.func @gather_batching_dims_indices_remain_unsorted(%arg0: tensor<2x5x4x7x9xi32>, %arg1: tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [2, 3], -++ operand_batching_dims = [0, 1], -++ start_indices_batching_dims = [0, 2], -++ start_index_map = [2, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<2x5x4x7x9xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x8xi32> -++ func.return %0 : tensor<2x3x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dims_does_not_overflow_indices_type -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x127x5x1xi8> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x127x5x1xi8> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<4x127x5x1xi8>, tensor<4x127x5x1xi8>, tensor<4x127x5x2xi8>) -> tensor<4x127x5x4xi8> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 2, 1, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<127x2x4x7x9xi32>, tensor<4x127x5x4xi8>) -> tensor<4x127x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<4x127x5x8xi32> -++func.func @gather_batching_dims_does_not_overflow_indices_type(%arg0: tensor<127x2x4x7x9xi32>, %arg1: tensor<4x127x5x2xi8>) -> tensor<4x127x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<127x2x4x7x9xi32>, tensor<4x127x5x2xi8>) -> tensor<4x127x5x8xi32> -++ func.return %0 : tensor<4x127x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dim_overflows_signless_indices_type -++// CHECK-NEXT: %[[convert:.*]] = stablehlo.convert %arg1 : (tensor<4x128x5x2xi8>) -> tensor<4x128x5x2xi32> -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x128x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x128x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %[[convert]], dim = 3 : (tensor<4x128x5x1xi32>, tensor<4x128x5x1xi32>, tensor<4x128x5x2xi32>) -> tensor<4x128x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 2, 1, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<128x2x4x7x9xi32>, tensor<4x128x5x4xi32>) -> tensor<4x128x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<4x128x5x8xi32> -++func.func @gather_batching_dim_overflows_signless_indices_type(%arg0: tensor<128x2x4x7x9xi32>, %arg1: tensor<4x128x5x2xi8>) -> tensor<4x128x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<128x2x4x7x9xi32>, tensor<4x128x5x2xi8>) -> tensor<4x128x5x8xi32> -++ func.return %0 : tensor<4x128x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dim_overflows_unsigned_indices_type -++// CHECK-NEXT: %[[convert:.*]] = stablehlo.convert %arg1 : (tensor<256x4x5x2xui8>) -> tensor<256x4x5x2xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<256x4x5x1xi32> -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<256x4x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim0]], %[[iota_dim1]], %[[convert]], dim = 3 : (tensor<256x4x5x1xi32>, tensor<256x4x5x1xi32>, tensor<256x4x5x2xi32>) -> tensor<256x4x5x4xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 2, 1, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<256x2x4x7x9xi32>, tensor<256x4x5x4xi32>) -> tensor<256x4x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<256x4x5x8xi32> -++func.func @gather_batching_dim_overflows_unsigned_indices_type(%arg0: tensor<256x2x4x7x9xi32>, %arg1: tensor<256x4x5x2xui8>) -> tensor<256x4x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [0, 1], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<256x2x4x7x9xi32>, tensor<256x4x5x2xui8>) -> tensor<256x4x5x8xi32> -++ func.return %0 : tensor<256x4x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dim_overflows_indices_type_and_i32 -++// CHECK-NEXT: %[[convert:.*]] = stablehlo.convert %arg1 : (tensor<4x2147483648x5x2xi8>) -> tensor<4x2147483648x5x2xi64> -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x2147483648x5x1xi64> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x2147483648x5x1xi64> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %[[convert]], dim = 3 : (tensor<4x2147483648x5x1xi64>, tensor<4x2147483648x5x1xi64>, tensor<4x2147483648x5x2xi64>) -> tensor<4x2147483648x5x4xi64> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2, 3], -++// CHECK-SAME: start_index_map = [0, 2, 1, 3], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<2147483648x2x4x7x9xi32>, tensor<4x2147483648x5x4xi64>) -> tensor<4x2147483648x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<4x2147483648x5x8xi32> -++func.func @gather_batching_dim_overflows_indices_type_and_i32(%arg0: tensor<2147483648x2x4x7x9xi32>, %arg1: tensor<4x2147483648x5x2xi8>) -> tensor<4x2147483648x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<2147483648x2x4x7x9xi32>, tensor<4x2147483648x5x2xi8>) -> tensor<4x2147483648x5x8xi32> -++ func.return %0 : tensor<4x2147483648x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dim_dynamic_size -++// CHECK: operand_batching_dims = [0, 2] -++// CHECK: start_indices_batching_dims = [1, 0] -++func.func @gather_batching_dim_dynamic_size(%arg0: tensor, %arg1: tensor<4x?x5x2xi8>) -> tensor<4x?x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1, 3], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [1, 3], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor, tensor<4x?x5x2xi8>) -> tensor<4x?x5x8xi32> -++ func.return %0 : tensor<4x?x5x8xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @gather_batching_dim_overflows_and_no_index_vector_dim -++// CHECK-NEXT: %[[convert:.*]] = stablehlo.convert %arg1 : (tensor<4x128x5xi8>) -> tensor<4x128x5xi32> -++// CHECK-NEXT: %[[reshape:.*]] = stablehlo.reshape %[[convert]] : (tensor<4x128x5xi32>) -> tensor<4x128x5x1xi32> -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x128x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x128x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %[[reshape]], dim = 3 : (tensor<4x128x5x1xi32>, tensor<4x128x5x1xi32>, tensor<4x128x5x1xi32>) -> tensor<4x128x5x3xi32> -++// CHECK-NEXT: %[[gather:.*]] = "stablehlo.gather"(%arg0, %[[concat]]) <{ -++// CHECK-SAME: dimension_numbers = #stablehlo.gather< -++// CHECK-SAME: offset_dims = [3], collapsed_slice_dims = [0, 1, 2], -++// CHECK-SAME: start_index_map = [0, 2, 1], index_vector_dim = 3>, -++// CHECK-SAME: indices_are_sorted = false, -++// CHECK-SAME: slice_sizes = array -++// CHECK-SAME: }> : (tensor<128x2x4x9xi32>, tensor<4x128x5x3xi32>) -> tensor<4x128x5x8xi32> -++// CHECK-NEXT: return %[[gather]] : tensor<4x128x5x8xi32> -++func.func @gather_batching_dim_overflows_and_no_index_vector_dim(%arg0: tensor<128x2x4x9xi32>, %arg1: tensor<4x128x5xi8>) -> tensor<4x128x5x8xi32> { -++ %0 = "stablehlo.gather"(%arg0, %arg1) { -++ dimension_numbers = #stablehlo.gather< -++ offset_dims = [3], -++ collapsed_slice_dims = [1], -++ operand_batching_dims = [0, 2], -++ start_indices_batching_dims = [1, 0], -++ start_index_map = [1], -++ index_vector_dim = 3 -++ >, -++ slice_sizes = array, -++ indices_are_sorted = false -++ } : (tensor<128x2x4x9xi32>, tensor<4x128x5xi8>) -> tensor<4x128x5x8xi32> -++ func.return %0 : tensor<4x128x5x8xi32> -+ } -+ -+ // ----- -+@@ -156,7 +452,7 @@ -+ // CHECK-NO-DOWNGRADE: input_batching_dims = [0, 2] -+ // CHECK-NO-DOWNGRADE: scatter_indices_batching_dims = [1, 0] -+ %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) <{ -+- indices_are_sorted = true, -++ indices_are_sorted = false, -+ scatter_dimension_numbers = #stablehlo.scatter< -+ update_window_dims = [3], -+ inserted_window_dims = [1, 3], -+@@ -176,9 +472,9 @@ -+ // ----- -+ -+ // CHECK-LABEL: @scatter_with_batching_no_index_vector_dim -++// CHECK-NEXT: %[[reshape:.*]] = stablehlo.reshape %arg1 : (tensor<4x3x5xi32>) -> tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 1 : tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 0 : tensor<4x3x5x1xi32> -+-// CHECK-NEXT: %[[reshape:.*]] = stablehlo.reshape %arg1 : (tensor<4x3x5xi32>) -> tensor<4x3x5x1xi32> -+ // CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %[[reshape]], dim = 3 : (tensor<4x3x5x1xi32>, tensor<4x3x5x1xi32>, tensor<4x3x5x1xi32>) -> tensor<4x3x5x3xi32> -+ // CHECK-NEXT: %[[scatter:.*]] = "stablehlo.scatter"(%arg0, %[[concat]], %arg2) <{ -+ // CHECK-SAME: indices_are_sorted = false, -+@@ -192,7 +488,7 @@ -+ // CHECK-NO-DOWNGRADE: input_batching_dims = [0, 2] -+ // CHECK-NO-DOWNGRADE: scatter_indices_batching_dims = [1, 0] -+ %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) <{ -+- indices_are_sorted = true, -++ indices_are_sorted = false, -+ scatter_dimension_numbers = #stablehlo.scatter< -+ update_window_dims = [3], -+ inserted_window_dims = [1], -+@@ -208,3 +504,60 @@ -+ }) : (tensor<3x2x4x9xi32>, tensor<4x3x5xi32>, tensor<4x3x5x8xi32>) -> tensor<3x2x4x9xi32> -+ func.return %0 : tensor<3x2x4x9xi32> -+ } -++ -++// ----- -++ -++// CHECK-LABEL: @scatter_batching_dims_indices_remain_sorted -++// CHECK-NEXT: %[[iota_dim1:.*]] = stablehlo.iota dim = 0 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[iota_dim0:.*]] = stablehlo.iota dim = 2 : tensor<2x3x5x1xi32> -++// CHECK-NEXT: %[[concat:.*]] = stablehlo.concatenate %[[iota_dim1]], %[[iota_dim0]], %arg1, dim = 3 : (tensor<2x3x5x1xi32>, tensor<2x3x5x1xi32>, tensor<2x3x5x2xi32>) -> tensor<2x3x5x4xi32> -++// CHECK-NEXT: %[[scatter:.*]] = "stablehlo.scatter"(%arg0, %[[concat]], %arg2) <{ -++// CHECK-SAME: indices_are_sorted = true, -++// CHECK-SAME: dimension_numbers = #stablehlo.scatter< -++// CHECK-SAME: update_window_dims = [3], inserted_window_dims = [0, 1, 2, 3], -++// CHECK-SAME: scatter_dims_to_operand_dims = [0, 1, 2, 3], index_vector_dim = 3>, -++// CHECK-SAME: unique_indices = false}> -++// CHECK: (tensor<2x5x4x7x9xi32>, tensor<2x3x5x4xi32>, tensor<2x3x5x8xi32>) -> tensor<2x5x4x7x9xi32> -++// CHECK-NEXT: return %[[scatter]] : tensor<2x5x4x7x9xi32> -++func.func @scatter_batching_dims_indices_remain_sorted(%arg0: tensor<2x5x4x7x9xi32>, %arg1: tensor<2x3x5x2xi32>, %arg2: tensor<2x3x5x8xi32>) -> tensor<2x5x4x7x9xi32> { -++ %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) <{ -++ indices_are_sorted = true, -++ scatter_dimension_numbers = #stablehlo.scatter< -++ update_window_dims = [3], -++ inserted_window_dims = [2, 3], -++ input_batching_dims = [0, 1], -++ scatter_indices_batching_dims = [0, 2], -++ scatter_dims_to_operand_dims = [2, 3], -++ index_vector_dim = 3 -++ >, -++ unique_indices = false -++ }> ({ -++ ^bb0(%arg3: tensor, %arg4: tensor): -++ stablehlo.return %arg4 : tensor -++ }) : (tensor<2x5x4x7x9xi32>, tensor<2x3x5x2xi32>, tensor<2x3x5x8xi32>) -> tensor<2x5x4x7x9xi32> -++ func.return %0 : tensor<2x5x4x7x9xi32> -++} -++ -++// ----- -++ -++// CHECK-LABEL: @scatter_batching_dim_dynamic_scatter_indices -++// CHECK: input_batching_dims = [0, 2] -++// CHECK: scatter_indices_batching_dims = [1, 0] -++func.func @scatter_batching_dim_dynamic_scatter_indices(%arg0: tensor, %arg1: tensor<4x?x5x2xi32>, %arg2: tensor<4x?x5x8xi32>) -> tensor { -++ %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) <{ -++ indices_are_sorted = false, -++ scatter_dimension_numbers = #stablehlo.scatter< -++ update_window_dims = [3], -++ inserted_window_dims = [1, 3], -++ input_batching_dims = [0, 2], -++ scatter_indices_batching_dims = [1, 0], -++ scatter_dims_to_operand_dims = [1, 3], -++ index_vector_dim = 3 -++ >, -++ unique_indices = false -++ }> ({ -++ ^bb0(%arg3: tensor, %arg4: tensor): -++ stablehlo.return %arg4 : tensor -++ }) : (tensor, tensor<4x?x5x2xi32>, tensor<4x?x5x8xi32>) -> tensor -++ func.return %0 : tensor -++} -+diff --ruN a/stablehlo/stablehlo/tools/StablehloTranslateMain.cpp b/stablehlo/stablehlo/tools/StablehloTranslateMain.cpp -+--- stablehlo/stablehlo/tools/StablehloTranslateMain.cpp -++++ stablehlo/stablehlo/tools/StablehloTranslateMain.cpp -+@@ -24,7 +24,7 @@ -+ #include "llvm/Support/ErrorHandling.h" -+ #include "llvm/Support/LogicalResult.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+ #include "mlir/IR/BuiltinOps.h" -+ #include "mlir/IR/DialectRegistry.h" -+@@ -237,7 +237,7 @@ -+ }, -+ [](DialectRegistry ®istry) { -+ registry.insert(); -+- registry.insert(); -++ registry.insert(); -+ registry.insert(); -+ registry.insert(); -+ registry.insert(); -+diff --ruN a/stablehlo/stablehlo/transforms/Passes.h b/stablehlo/stablehlo/transforms/Passes.h -+--- stablehlo/stablehlo/transforms/Passes.h -++++ stablehlo/stablehlo/transforms/Passes.h -+@@ -19,7 +19,7 @@ -+ #include -+ -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -+ #include "mlir/Dialect/Shape/IR/Shape.h" -+ #include "mlir/IR/BuiltinOps.h" -+ #include "mlir/Pass/Pass.h" -+diff --ruN a/stablehlo/stablehlo/transforms/Passes.td b/stablehlo/stablehlo/transforms/Passes.td -+--- stablehlo/stablehlo/transforms/Passes.td -++++ stablehlo/stablehlo/transforms/Passes.td -+@@ -68,7 +68,7 @@ -+ let summary = "Legalize VHLO to StableHLO."; -+ let dependentDialects = [ -+ "mlir::func::FuncDialect", -+- "mlir::quant::QuantizationDialect", -++ "mlir::quant::QuantDialect", -+ "mlir::shape::ShapeDialect", -+ "mlir::stablehlo::StablehloDialect", -+ ]; -+diff --ruN a/stablehlo/stablehlo/transforms/StablehloCreateCompatibilityExpander.cpp b/stablehlo/stablehlo/transforms/StablehloCreateCompatibilityExpander.cpp -+--- stablehlo/stablehlo/transforms/StablehloCreateCompatibilityExpander.cpp -++++ stablehlo/stablehlo/transforms/StablehloCreateCompatibilityExpander.cpp -+@@ -22,8 +22,11 @@ -+ #include "llvm/ADT/STLExtras.h" -+ #include "llvm/ADT/SmallVector.h" -+ #include "llvm/Support/ErrorHandling.h" -++#include "llvm/Support/MathExtras.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -++#include "mlir/IR/Builders.h" -+ #include "mlir/IR/BuiltinAttributes.h" -++#include "mlir/IR/BuiltinTypeInterfaces.h" -+ #include "mlir/IR/BuiltinTypes.h" -+ #include "mlir/IR/Diagnostics.h" -+ #include "mlir/IR/PatternMatch.h" -+@@ -75,6 +78,42 @@ -+ return result; -+ } -+ -++bool fitsInIntegralType(int64_t size, IntegerType type) { -++ if (type.isUnsigned()) { -++ return llvm::isUIntN(type.getWidth(), size); -++ } else { -++ return llvm::isIntN(type.getWidth(), size); -++ } -++} -++ -++// If `type` is an integer type in which `size` doesn't fit, promote it to i32 -++// or i64 (depending on `size`). -++Type promoteTypeForSize(Type type, int64_t size, OpBuilder &builder) { -++ // Gather/Scatter should have an integer type, but we check just in case. -++ auto intType = dyn_cast(type); -++ if (!intType || fitsInIntegralType(size, intType)) { -++ return type; -++ } -++ if (fitsInIntegralType(size, builder.getI32Type())) { -++ return builder.getI32Type(); -++ } -++ return builder.getI64Type(); -++} -++ -++// If `indices_batching_dims` and `updated_index_map` are both sorted, then the -++// `indices_are_sorted` property is preserved. -++// -++// This is because each concatenated iota is monotonically increasing, sorted -++// indices batching dims mean their order corresponds to the order of batching -++// dims in the operand, and a sorted updated start index map means the order of -++// the index vector dim corresponds to the order of operand dims. -++bool getUpdatedIndicesAreSorted(bool indices_are_sorted, -++ ArrayRef indices_batching_dims, -++ ArrayRef updated_index_map) { -++ return indices_are_sorted && llvm::is_sorted(indices_batching_dims) && -++ llvm::is_sorted(updated_index_map); -++} -++ -+ // Returns an updated indices tensor such that an `IotaOp` is prepended for each -+ // dim in `indicesBatchingDims` with a `ConcatenateOp`. -+ // -+@@ -85,16 +124,31 @@ -+ PatternRewriter &rewriter) { -+ Location loc = indices.getLoc(); -+ auto indicesType = cast(indices.getType()); -++ Type elementType = indicesType.getElementType(); -++ -++ // The batching dim sizes might not fit in the existing element type, -++ // in which case we need to promote it. -++ for (int64_t batchingDim : indicesBatchingDims) { -++ elementType = promoteTypeForSize( -++ elementType, indicesType.getDimSize(batchingDim), rewriter); -++ } -++ if (elementType != indicesType.getElementType()) { -++ indicesType = RankedTensorType::get(indicesType.getShape(), elementType); -++ indices = rewriter.create(loc, indicesType, indices); -++ } -++ -+ bool indexVectorDimOnLastDim = indexVectorDim == indicesType.getRank(); -+- -+ SmallVector iotaShape(indicesType.getShape()); -+ if (indexVectorDimOnLastDim) { -+ iotaShape.push_back(1); -+ } else { -+ iotaShape[indexVectorDim] = 1; -+ } -+- auto iotaType = -+- RankedTensorType::get(iotaShape, indicesType.getElementType()); -++ auto iotaType = RankedTensorType::get(iotaShape, elementType); -++ -++ if (indexVectorDimOnLastDim) { -++ indices = rewriter.create(loc, iotaType, indices); -++ } -+ -+ SmallVector indicesToConcat; -+ indicesToConcat.reserve(indicesBatchingDims.size() + 1); -+@@ -102,12 +156,7 @@ -+ indicesToConcat.push_back( -+ rewriter.create(loc, iotaType, batchingDim)); -+ } -+- if (indexVectorDimOnLastDim) { -+- indicesToConcat.push_back( -+- rewriter.create(loc, iotaType, indices)); -+- } else { -+- indicesToConcat.push_back(indices); -+- } -++ indicesToConcat.push_back(indices); -+ return rewriter.create(loc, indicesToConcat, indexVectorDim); -+ } -+ -+@@ -125,9 +174,17 @@ -+ PatternRewriter &rewriter) const override { -+ GatherDimensionNumbersAttr dimNumbers = op.getDimensionNumbers(); -+ ArrayRef operandBatchingDims = dimNumbers.getOperandBatchingDims(); -++ ArrayRef startIndicesBatchingDims = -++ dimNumbers.getStartIndicesBatchingDims(); -+ if (operandBatchingDims.empty()) { -+ return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { -+ diag << "gather op has no batching dims"; -++ }); -++ } -++ -++ if (!op.getStartIndices().getType().hasStaticShape()) { -++ return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { -++ diag << "gather op has start indices with dynamic shape, can't expand"; -+ }); -+ } -+ -+@@ -136,16 +193,18 @@ -+ SmallVector newStartIndexMap = -+ llvm::to_vector(llvm::concat( -+ operandBatchingDims, dimNumbers.getStartIndexMap())); -+- Value newIndices = createConcatIndices( -+- op.getStartIndices(), dimNumbers.getIndexVectorDim(), -+- dimNumbers.getStartIndicesBatchingDims(), rewriter); -++ Value newIndices = createConcatIndices(op.getStartIndices(), -++ dimNumbers.getIndexVectorDim(), -++ startIndicesBatchingDims, rewriter); -+ rewriter.replaceOpWithNewOp( -+ op, op.getOperand(), newIndices, -+ GatherDimensionNumbersAttr::get( -+ op.getContext(), dimNumbers.getOffsetDims(), newCollapsedSliceDims, -+ /*operandBatchingDims=*/{}, /*startIndicesBatchingDims=*/{}, -+ newStartIndexMap, dimNumbers.getIndexVectorDim()), -+- op.getSliceSizes(), /*indicesAreSorted=*/false); -++ op.getSliceSizes(), -++ getUpdatedIndicesAreSorted(op.getIndicesAreSorted(), -++ startIndicesBatchingDims, newStartIndexMap)); -+ -+ return success(); -+ } -+@@ -161,9 +220,17 @@ -+ PatternRewriter &rewriter) const override { -+ ScatterDimensionNumbersAttr dimNumbers = op.getScatterDimensionNumbers(); -+ ArrayRef inputBatchingDims = dimNumbers.getInputBatchingDims(); -++ ArrayRef scatterIndicesBatchingDims = -++ dimNumbers.getScatterIndicesBatchingDims(); -+ if (inputBatchingDims.empty()) { -+ return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { -+ diag << "scatter op has no batching dims"; -++ }); -++ } -++ -++ if (!op.getScatterIndices().getType().hasStaticShape()) { -++ return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { -++ diag << "gather op has start indices with dynamic shape, can't expand"; -+ }); -+ } -+ -+@@ -174,7 +241,7 @@ -+ inputBatchingDims, dimNumbers.getScatterDimsToOperandDims())); -+ Value newIndices = createConcatIndices( -+ op.getScatterIndices(), dimNumbers.getIndexVectorDim(), -+- dimNumbers.getScatterIndicesBatchingDims(), rewriter); -++ scatterIndicesBatchingDims, rewriter); -+ auto newScatterOp = rewriter.create( -+ op.getLoc(), op->getResultTypes(), op.getInputs(), newIndices, -+ op.getUpdates(), -+@@ -183,7 +250,10 @@ -+ newInsertedWindowDims, -+ /*inputBatchingDims=*/{}, /*scatterIndicesBatchingDims=*/{}, -+ newScatterDimsToOperandDims, dimNumbers.getIndexVectorDim()), -+- /*indicesAreSorted=*/false, op.getUniqueIndices()); -++ getUpdatedIndicesAreSorted(op.getIndicesAreSorted(), -++ scatterIndicesBatchingDims, -++ newScatterDimsToOperandDims), -++ op.getUniqueIndices()); -+ -+ newScatterOp.getUpdateComputation().takeBody(op.getUpdateComputation()); -+ rewriter.replaceOp(op, newScatterOp.getResults()); -+diff --ruN a/stablehlo/stablehlo/transforms/StablehloLegalizeQDQToQuantizedOp.cpp b/stablehlo/stablehlo/transforms/StablehloLegalizeQDQToQuantizedOp.cpp -+--- stablehlo/stablehlo/transforms/StablehloLegalizeQDQToQuantizedOp.cpp -++++ stablehlo/stablehlo/transforms/StablehloLegalizeQDQToQuantizedOp.cpp -+@@ -15,7 +15,7 @@ -+ -+ #include "llvm/ADT/SmallVector.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/Operation.h" -+ #include "mlir/IR/PatternMatch.h" -+ #include "mlir/Transforms/DialectConversion.h" // Include for TypeConverter -+diff --ruN a/stablehlo/stablehlo/transforms/StablehloLegalizeQuantToMath.cpp b/stablehlo/stablehlo/transforms/StablehloLegalizeQuantToMath.cpp -+--- stablehlo/stablehlo/transforms/StablehloLegalizeQuantToMath.cpp -++++ stablehlo/stablehlo/transforms/StablehloLegalizeQuantToMath.cpp -+@@ -24,8 +24,8 @@ -+ #include "llvm/ADT/SmallVector.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+ #include "mlir/Dialect/Func/Transforms/FuncConversions.h" -+-#include "mlir/Dialect/Quant/QuantOps.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/Quant.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/Attributes.h" -+ #include "mlir/IR/BuiltinAttributes.h" -+ #include "mlir/IR/BuiltinTypeInterfaces.h" -+@@ -1331,7 +1331,7 @@ -+ populateReturnOpTypeConversionPattern(patterns, converter); -+ -+ ConversionTarget target(*op->getContext()); -+- target.addIllegalDialect(); -++ target.addIllegalDialect(); -+ auto isLegal = [&converter](Operation *op) { -+ return converter.isLegal(op); -+ }; -+diff --ruN a/stablehlo/stablehlo/transforms/StablehloLegalizeQuantizedOpToQDQ.cpp b/stablehlo/stablehlo/transforms/StablehloLegalizeQuantizedOpToQDQ.cpp -+--- stablehlo/stablehlo/transforms/StablehloLegalizeQuantizedOpToQDQ.cpp -++++ stablehlo/stablehlo/transforms/StablehloLegalizeQuantizedOpToQDQ.cpp -+@@ -17,7 +17,7 @@ -+ -+ #include "llvm/ADT/STLExtras.h" -+ #include "mlir/Dialect/Func/IR/FuncOps.h" -+-#include "mlir/Dialect/Quant/QuantTypes.h" -++#include "mlir/Dialect/Quant/IR/QuantTypes.h" -+ #include "mlir/IR/BuiltinTypeInterfaces.h" -+ #include "mlir/IR/PatternMatch.h" -+ #include "mlir/IR/TypeRange.h" - diff --git a/third_party/stablehlo/workspace.bzl b/third_party/stablehlo/workspace.bzl -index 2e87599..0a9d3d0 100644 +index 2e87599..1aa833a 100644 --- a/third_party/stablehlo/workspace.bzl +++ b/third_party/stablehlo/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") @@ -6230,8 +6125,8 @@ index 2e87599..0a9d3d0 100644 # - STABLEHLO_COMMIT = "ca13d31b5ed0b2053dde0a624480ad765e219ebf" - STABLEHLO_SHA256 = "123462093f087f2576bb6a6cc471370eed2d43c291f881ff359fd4ca812003db" -+ STABLEHLO_COMMIT = "9d9290dc2308c1850cea69ea05f8c94017e484ee" -+ STABLEHLO_SHA256 = "29803fc8a3a96f9e5469c7ab51f2ff4292dc2419c17bd0466f5d15a448cf6815" ++ STABLEHLO_COMMIT = "f7f8e4e35296deeff2e12e39421ac8d9599ba340" ++ STABLEHLO_SHA256 = "c92b55d5512e58d6fefba62c58e60d7762adb184dc3ad489521de562f6ca7aeb" # tf_http_archive( diff --git a/third_party/tsl/third_party/llvm/generated.patch b/third_party/tsl/third_party/llvm/generated.patch index 1bea5353eeed4..155d3f2cc1ec4 100644 --- a/third_party/tsl/third_party/llvm/generated.patch +++ b/third_party/tsl/third_party/llvm/generated.patch @@ -1,78 +1,901 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch1/include/toy/Lexer.h b/mlir/examples/toy/Ch1/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch1/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch1/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp +--- a/clang/lib/CodeGen/CGDeclCXX.cpp ++++ b/clang/lib/CodeGen/CGDeclCXX.cpp +@@ -640,13 +640,13 @@ + addUsedGlobal(COMDATKey); + } - #include "llvm/ADT/StringRef.h" +- // If comdats are in use and supported, place the initializer function into +- // the comdat group of the global. In the MS ABI, initializers are mangled +- // and have their own comdat, so we don't include them in the group for +- // consistency with MSVC. ++ // If we used a COMDAT key for the global ctor, the init function can be ++ // discarded if the global ctor entry is discarded. ++ // FIXME: Do we need to restrict this to ELF and Wasm? + llvm::Comdat *C = Addr->getComdat(); +- if (COMDATKey && C && getTriple().supportsCOMDAT() && +- !getTarget().getCXXABI().isMicrosoft()) { ++ if (COMDATKey && C && ++ (getTarget().getTriple().isOSBinFormatELF() || ++ getTarget().getTriple().isOSBinFormatWasm())) { + Fn->setComdat(C); + } + } else { +diff -ruN --strip-trailing-cr a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv +--- a/libcxx/docs/Status/Cxx23Issues.csv ++++ b/libcxx/docs/Status/Cxx23Issues.csv +@@ -296,7 +296,7 @@ + "`LWG3862 `__","``basic_const_iterator``'s ``common_type`` specialization is underconstrained","2023-02 (Issaquah)","","","" + "`LWG3865 `__","Sorting a range of ``pairs``","2023-02 (Issaquah)","|Complete|","17.0","" + "`LWG3869 `__","Deprecate ``std::errc`` constants related to UNIX STREAMS","2023-02 (Issaquah)","|Complete|","19.0","" +-"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","|Complete|","20.0","" ++"`LWG3870 `__","Remove ``voidify``","2023-02 (Issaquah)","","","" + "`LWG3871 `__","Adjust note about ``terminate``","2023-02 (Issaquah)","","","" + "`LWG3872 `__","``basic_const_iterator`` should have custom ``iter_move``","2023-02 (Issaquah)","","","" + "`LWG3875 `__","``std::ranges::repeat_view::iterator`` may be ill-formed","2023-02 (Issaquah)","|Complete|","17.0","" +diff -ruN --strip-trailing-cr a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt +--- a/libcxx/include/CMakeLists.txt ++++ b/libcxx/include/CMakeLists.txt +@@ -560,6 +560,7 @@ + __memory/unique_temporary_buffer.h + __memory/uses_allocator.h + __memory/uses_allocator_construction.h ++ __memory/voidify.h + __memory_resource/memory_resource.h + __memory_resource/monotonic_buffer_resource.h + __memory_resource/polymorphic_allocator.h +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/construct_at.h b/libcxx/include/__memory/construct_at.h +--- a/libcxx/include/__memory/construct_at.h ++++ b/libcxx/include/__memory/construct_at.h +@@ -14,6 +14,7 @@ + #include <__config> + #include <__iterator/access.h> + #include <__memory/addressof.h> ++#include <__memory/voidify.h> + #include <__type_traits/enable_if.h> + #include <__type_traits/is_array.h> + #include <__utility/declval.h> +@@ -37,7 +38,7 @@ + template ()) _Tp(std::declval<_Args>()...))> + _LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) { + _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"); +- return ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); ++ return ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + } -+#include - #include - #include + #endif +@@ -48,7 +49,7 @@ + return std::construct_at(__location, std::forward<_Args>(__args)...); + #else + return _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"), +- ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); ++ ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + #endif + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch2/include/toy/Lexer.h b/mlir/examples/toy/Ch2/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch2/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch2/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h +--- a/libcxx/include/__memory/shared_ptr.h ++++ b/libcxx/include/__memory/shared_ptr.h +@@ -248,35 +248,33 @@ - #include "llvm/ADT/StringRef.h" + template + struct __shared_ptr_emplace : __shared_weak_count { +- using __value_type = __remove_cv_t<_Tp>; +- + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&...) : __storage_(std::move(__a)) { + static_assert( + sizeof...(_Args) == 0, "No argument should be provided to the control block when using _for_overwrite"); +- ::new (static_cast(__get_elem())) __value_type; ++ ::new ((void*)__get_elem()) _Tp; + } -+#include - #include - #include + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&&... __args) : __storage_(std::move(__a)) { +- using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __value_type>::type; ++ using _TpAlloc = typename __allocator_traits_rebind<_Alloc, __remove_cv_t<_Tp> >::type; + _TpAlloc __tmp(*__get_alloc()); + allocator_traits<_TpAlloc>::construct(__tmp, __get_elem(), std::forward<_Args>(__args)...); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch3/include/toy/Lexer.h b/mlir/examples/toy/Ch3/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch3/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch3/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + _LIBCPP_HIDE_FROM_ABI _Alloc* __get_alloc() _NOEXCEPT { return __storage_.__get_alloc(); } - #include "llvm/ADT/StringRef.h" +- _LIBCPP_HIDE_FROM_ABI __value_type* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } ++ _LIBCPP_HIDE_FROM_ABI _Tp* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } -+#include - #include - #include + private: + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __on_zero_shared_impl() _NOEXCEPT { +- __get_elem()->~__value_type(); ++ __get_elem()->~_Tp(); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch4/include/toy/Lexer.h b/mlir/examples/toy/Ch4/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch4/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch4/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + template (__buffer_)->__alloc_); + } -+#include - #include - #include +- _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI __value_type* __get_elem() _NOEXCEPT { ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { + return std::addressof(reinterpret_cast<_Data*>(__buffer_)->__elem_); + } + }; +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h +--- a/libcxx/include/__memory/uninitialized_algorithms.h ++++ b/libcxx/include/__memory/uninitialized_algorithms.h +@@ -21,6 +21,7 @@ + #include <__memory/allocator_traits.h> + #include <__memory/construct_at.h> + #include <__memory/pointer_traits.h> ++#include <__memory/voidify.h> + #include <__type_traits/enable_if.h> + #include <__type_traits/extent.h> + #include <__type_traits/is_array.h> +@@ -63,7 +64,7 @@ + try { + #endif + for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); ++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +@@ -93,7 +94,7 @@ + try { + #endif + for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); ++ ::new (std::__voidify(*__idx)) _ValueType(*__ifirst); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +@@ -123,7 +124,7 @@ + try { + #endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); ++ ::new (std::__voidify(*__idx)) _ValueType(__x); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -151,7 +152,7 @@ + try { + #endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); ++ ::new (std::__voidify(*__idx)) _ValueType(__x); + #ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -181,7 +182,7 @@ + try { + # endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType; ++ ::new (std::__voidify(*__idx)) _ValueType; + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -207,7 +208,7 @@ + try { + # endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType; ++ ::new (std::__voidify(*__idx)) _ValueType; + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -234,7 +235,7 @@ + try { + # endif + for (; __idx != __last; ++__idx) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(); ++ ::new (std::__voidify(*__idx)) _ValueType(); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -260,7 +261,7 @@ + try { + # endif + for (; __n > 0; ++__idx, (void)--__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(); ++ ::new (std::__voidify(*__idx)) _ValueType(); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__first, __idx); +@@ -296,7 +297,7 @@ + try { + # endif + for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) { +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); ++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); + } + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { +@@ -334,7 +335,7 @@ + try { + # endif + for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n) +- ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); ++ ::new (std::__voidify(*__idx)) _ValueType(__iter_move(__ifirst)); + # ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (...) { + std::__destroy(__ofirst, __idx); +diff -ruN --strip-trailing-cr a/libcxx/include/__memory/voidify.h b/libcxx/include/__memory/voidify.h +--- a/libcxx/include/__memory/voidify.h ++++ b/libcxx/include/__memory/voidify.h +@@ -0,0 +1,30 @@ ++// -*- C++ -*- ++//===----------------------------------------------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _LIBCPP___MEMORY_VOIDIFY_H ++#define _LIBCPP___MEMORY_VOIDIFY_H ++ ++#include <__config> ++#include <__memory/addressof.h> ++ ++#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) ++# pragma GCC system_header ++#endif ++ ++_LIBCPP_BEGIN_NAMESPACE_STD ++ ++template ++_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void* __voidify(_Tp& __from) { ++ // Cast away cv-qualifiers to allow modifying elements of a range through const iterators. ++ return const_cast(static_cast(std::addressof(__from))); ++} ++ ++_LIBCPP_END_NAMESPACE_STD ++ ++#endif // _LIBCPP___MEMORY_VOIDIFY_H +diff -ruN --strip-trailing-cr a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap +--- a/libcxx/include/module.modulemap ++++ b/libcxx/include/module.modulemap +@@ -1528,6 +1528,7 @@ + } + module uses_allocator { header "__memory/uses_allocator.h" } + module uses_allocator_construction { header "__memory/uses_allocator_construction.h" } ++ module voidify { header "__memory/voidify.h" } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch5/include/toy/Lexer.h b/mlir/examples/toy/Ch5/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch5/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch5/include/toy/Lexer.h -@@ -15,6 +15,7 @@ + header "memory" + export * +diff -ruN --strip-trailing-cr a/libcxx/include/optional b/libcxx/include/optional +--- a/libcxx/include/optional ++++ b/libcxx/include/optional +@@ -287,7 +287,7 @@ + static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); + union { + char __null_state_; +- remove_cv_t __val_; ++ value_type __val_; + }; + bool __engaged_; - #include "llvm/ADT/StringRef.h" +@@ -323,7 +323,7 @@ + static_assert(is_object_v, "instantiation of optional with a non-object type is undefined behavior"); + union { + char __null_state_; +- remove_cv_t __val_; ++ value_type __val_; + }; + bool __engaged_; -+#include - #include - #include +@@ -377,7 +377,7 @@ + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from(_That&& __opt) { + if (this->__engaged_ == __opt.has_value()) { + if (this->__engaged_) +- static_cast<_Tp&>(this->__val_) = std::forward<_That>(__opt).__get(); ++ this->__val_ = std::forward<_That>(__opt).__get(); + } else { + if (this->__engaged_) + this->reset(); +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/construct_at.pass.cpp +@@ -80,6 +80,21 @@ + a.deallocate(p, 2); + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch6/include/toy/Lexer.h b/mlir/examples/toy/Ch6/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch6/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch6/include/toy/Lexer.h -@@ -15,6 +15,7 @@ ++ { ++ std::allocator a; ++ Counted const* p = a.allocate(2); ++ int count = 0; ++ std::construct_at(p, count); ++ assert(count == 1); ++ std::construct_at(p+1, count); ++ assert(count == 2); ++ (p+1)->~Counted(); ++ assert(count == 1); ++ p->~Counted(); ++ assert(count == 0); ++ a.deallocate(const_cast(p), 2); ++ } ++ + return true; + } - #include "llvm/ADT/StringRef.h" +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.construct/ranges_construct_at.pass.cpp +@@ -99,6 +99,16 @@ + alloc.deallocate(out, 2); + } -+#include - #include - #include ++ // Works with const pointers. ++ { ++ int x = 1; ++ const int* ptr = &x; ++ ++ const int* result = std::ranges::construct_at(ptr, 42); ++ assert(result == ptr); ++ assert(x == 42); ++ } ++ + return true; + } -diff -ruN --strip-trailing-cr a/mlir/examples/toy/Ch7/include/toy/Lexer.h b/mlir/examples/toy/Ch7/include/toy/Lexer.h ---- a/mlir/examples/toy/Ch7/include/toy/Lexer.h -+++ b/mlir/examples/toy/Ch7/include/toy/Lexer.h -@@ -15,6 +15,7 @@ +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct_n.pass.cpp +@@ -75,5 +75,17 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS - #include "llvm/ADT/StringRef.h" ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_default_construct_n(buf.cbegin(), N); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.default/ranges_uninitialized_default_construct.pass.cpp +@@ -163,5 +163,30 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS -+#include - #include - #include ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_default_construct(buf.cbegin(), buf.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ ++ std::ranges::uninitialized_default_construct(range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct_n.pass.cpp +@@ -94,5 +94,17 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_value_construct_n(buf.cbegin(), N); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.construct.value/ranges_uninitialized_value_construct.pass.cpp +@@ -183,5 +183,30 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_value_construct(buf.cbegin(), buf.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ std::ranges::uninitialized_value_construct(range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +@@ -104,6 +104,22 @@ + + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_copy_n(in, N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp +@@ -278,6 +278,39 @@ + Counted::reset(); + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_copy(in, in + N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::subrange out_range(out.cbegin(), out.cend()); ++ std::ranges::uninitialized_copy(in, out_range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions, (iter, sentinel) overload. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill/ranges_uninitialized_fill.pass.cpp +@@ -198,5 +198,34 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_fill(buf.cbegin(), buf.cend(), x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ auto range = std::ranges::subrange(buf.cbegin(), buf.cend()); ++ std::ranges::uninitialized_fill(range, x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +@@ -101,5 +101,19 @@ + } + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Buffer buf; ++ ++ std::ranges::uninitialized_fill_n(buf.cbegin(), N, x); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::all_of(buf.begin(), buf.end(), pred)); ++ ++ std::destroy(buf.begin(), buf.end()); ++ Counted::reset(); ++ } ++ + return 0; + } +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +@@ -105,6 +105,22 @@ + + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_move_n(in, N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp ++++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +@@ -282,6 +282,39 @@ + Counted::reset(); + #endif // TEST_HAS_NO_EXCEPTIONS + ++ // Works with const iterators, (iter, sentinel) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::uninitialized_move(in, in + N, out.cbegin(), out.cend()); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ ++ // Works with const iterators, (range) overload. ++ { ++ constexpr int N = 5; ++ Counted in[N] = {Counted(1), Counted(2), Counted(3), Counted(4), Counted(5)}; ++ Buffer out; ++ Counted::reset(); ++ ++ std::ranges::subrange out_range (out.cbegin(), out.cend()); ++ std::ranges::uninitialized_move(in, out_range); ++ assert(Counted::current_objects == N); ++ assert(Counted::total_objects == N); ++ assert(std::equal(in, in + N, out.begin(), out.end())); ++ ++ std::destroy(out.begin(), out.end()); ++ } ++ Counted::reset(); ++ + // Conversions, (iter, sentinel) overload. + { + constexpr int N = 3; +diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -10287,10 +10287,8 @@ + SDValue LeftOp = ShiftOperand.getOperand(0); + SDValue RightOp = ShiftOperand.getOperand(1); + +- // Treat zext nneg as sext - we might need to support handling these as zext +- // as well in the future, but for now just prefer sext. +- bool IsSignExt = sd_match(LeftOp, m_SExtLike(m_Value())); +- bool IsZeroExt = sd_match(LeftOp, m_ZExt(m_Value())); ++ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; ++ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; + + if (!IsSignExt && !IsZeroExt) + return SDValue(); +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -9181,12 +9181,13 @@ + for (unsigned Cnt : Slices) { + ArrayRef Slice = VL.slice(Cnt, VF); + // If any instruction is vectorized already - do not try again. +- if (const TreeEntry *SE = getTreeEntry(Slice.front()); ++ if (TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; ++ SE->UserTreeIndices.emplace_back(&E, UINT_MAX); + AddCombinedNode(SE->Idx, Cnt); + continue; + } +@@ -13396,7 +13397,12 @@ + if (CommonMask[Idx] != PoisonMaskElem) + CommonMask[Idx] = Idx; + for (auto [E, Idx] : SubVectors) { +- Value *V = castToScalarTyElem(E->VectorizedValue); ++ Value *V = E->VectorizedValue; ++ if (V->getType()->isIntOrIntVectorTy()) ++ V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) { ++ return !isKnownNonNegative( ++ V, SimplifyQuery(*R.DL)); ++ })); + Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, + Builder.getInt64(Idx)); + if (!CommonMask.empty()) { +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll +--- a/llvm/test/CodeGen/X86/pmulh.ll ++++ b/llvm/test/CodeGen/X86/pmulh.ll +@@ -953,15 +953,39 @@ + ; SSE-NEXT: movdqa %xmm0, 16(%rdi) + ; SSE-NEXT: retq + ; +-; AVX-LABEL: PR109790: +-; AVX: # %bb.0: +-; AVX-NEXT: movq %rdi, %rax +-; AVX-NEXT: vmovdqa (%rsi), %ymm0 +-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +-; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] +-; AVX-NEXT: vmovdqa %ymm0, (%rdi) +-; AVX-NEXT: vzeroupper +-; AVX-NEXT: retq ++; AVX2-LABEL: PR109790: ++; AVX2: # %bb.0: ++; AVX2-NEXT: movq %rdi, %rax ++; AVX2-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] ++; AVX2-NEXT: vmovdqa %ymm0, (%rdi) ++; AVX2-NEXT: vzeroupper ++; AVX2-NEXT: retq ++; ++; AVX512F-LABEL: PR109790: ++; AVX512F: # %bb.0: ++; AVX512F-NEXT: movq %rdi, %rax ++; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ++; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 ++; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0 ++; AVX512F-NEXT: vpmovdw %zmm0, (%rdi) ++; AVX512F-NEXT: vzeroupper ++; AVX512F-NEXT: retq ++; ++; AVX512BW-LABEL: PR109790: ++; AVX512BW: # %bb.0: ++; AVX512BW-NEXT: movq %rdi, %rax ++; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0 ++; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ++; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ++; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0] ++; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0 ++; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) ++; AVX512BW-NEXT: vzeroupper ++; AVX512BW-NEXT: retq + %load = load <16 x i16>, ptr %a, align 32 + %and = and <16 x i16> %load, + %ext = zext nneg <16 x i16> %and to <16 x i32> +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +--- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll ++++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +@@ -0,0 +1,97 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ++; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s ++ ++define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ++; CHECK-LABEL: define i1 @test( ++; CHECK-SAME: i64 [[V1:%.*]], ptr [[V2:%.*]], i32 [[V3:%.*]], i1 [[V4:%.*]]) { ++; CHECK-NEXT: [[NEWFUNCROOT:.*:]] ++; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ++; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <2 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], ++; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i8> ++; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i8> [[TMP3]], ++; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> ++; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer ++; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0 ++; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> ++; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> ++; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 0) ++; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float> ++; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]] ++; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> poison, i1 [[V4]], i32 0 ++; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> ++; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP14]], <2 x i1> [[TMP6]], i64 0) ++; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x float> zeroinitializer, <4 x float> [[TMP12]] ++; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 ++; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 ++; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0 ++; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32> ++; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], ++; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 ++; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false ++; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 ++; CHECK-NEXT: [[CONV_I_I1743_2:%.*]] = fptoui float [[TMP22]] to i32 ++; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2 ++; CHECK-NEXT: [[NARROW1:%.*]] = select i1 [[TMP23]], i32 [[CONV_I_I1743_2]], i32 0 ++; CHECK-NEXT: [[TMP24:%.*]] = zext i1 [[NARROW]] to i32 ++; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[NARROW1]], [[TMP24]] ++; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 ++; CHECK-NEXT: [[CONV_I_I1743_1:%.*]] = fptoui float [[TMP26]] to i32 ++; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1 ++; CHECK-NEXT: [[NARROW2:%.*]] = select i1 [[TMP27]], i32 [[CONV_I_I1743_1]], i32 0 ++; CHECK-NEXT: [[RV3:%.*]] = or i32 [[TMP25]], [[NARROW2]] ++; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 ++; CHECK-NEXT: [[CONV_I_I1743:%.*]] = fptoui float [[TMP28]] to i32 ++; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 ++; CHECK-NEXT: [[NARROW4:%.*]] = select i1 [[TMP29]], i32 [[CONV_I_I1743]], i32 0 ++; CHECK-NEXT: [[RT5:%.*]] = or i32 [[RV3]], [[NARROW4]] ++; CHECK-NEXT: [[RT:%.*]] = zext i32 [[RT5]] to i64 ++; CHECK-NEXT: store i64 [[RT]], ptr [[V2]], align 1 ++; CHECK-NEXT: ret i1 false ++; ++newFuncRoot: ++ %conv.i147.i1756.3 = uitofp i32 %v3 to float ++ %div.i.i.i1749.3 = fdiv float 0.000000e+00, %conv.i147.i1756.3 ++ %cond.i.i.i1751.3 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.3 ++ %conv.i147.i1756.2 = uitofp i32 %v3 to float ++ %div.i.i.i1749.2 = fdiv float 0.000000e+00, %conv.i147.i1756.2 ++ %cond.i.i.i1751.2 = select i1 %v4, float 0.000000e+00, float %div.i.i.i1749.2 ++ %0 = lshr i64 %v1, 40 ++ %1 = trunc i64 %0 to i32 ++ %tt2 = and i32 %1, 255 ++ %cmp1.i.i.i1746.1 = icmp eq i32 %tt2, 0 ++ %conv.i147.i1756.1 = uitofp i32 %tt2 to float ++ %div.i.i.i1749.1 = fdiv float 0.000000e+00, %conv.i147.i1756.1 ++ %cond.i.i.i1751.1 = select i1 %cmp1.i.i.i1746.1, float 0.000000e+00, float %div.i.i.i1749.1 ++ %tt3 = lshr i64 %v1, 32 ++ %2 = trunc i64 %tt3 to i32 ++ %tt1 = and i32 %2, 1 ++ %cmp1.i.i.i1746 = icmp eq i32 %tt1, 0 ++ %conv.i147.i1756 = uitofp i32 %tt1 to float ++ %div.i.i.i1749 = fdiv float 0.000000e+00, %conv.i147.i1756 ++ %cond.i.i.i1751 = select i1 %cmp1.i.i.i1746, float 0.000000e+00, float %div.i.i.i1749 ++ %3 = bitcast float %cond.i.i.i1751.3 to i32 ++ %cmp.i99.i1736.3 = icmp ult i32 %3, 1333788672 ++ %conv.i.i1743.3 = fptoui float %cond.i.i.i1751.3 to i32 ++ %4 = icmp ne i32 %conv.i.i1743.3, 0 ++ %narrow = select i1 %cmp.i99.i1736.3, i1 %4, i1 false ++ %5 = bitcast float %cond.i.i.i1751.2 to i32 ++ %cmp.i99.i1736.2 = icmp ult i32 %5, 1333788672 ++ %conv.i.i1743.2 = fptoui float %cond.i.i.i1751.2 to i32 ++ %narrow1 = select i1 %cmp.i99.i1736.2, i32 %conv.i.i1743.2, i32 0 ++ %6 = zext i1 %narrow to i32 ++ %7 = or i32 %narrow1, %6 ++ %8 = bitcast float %cond.i.i.i1751.1 to i32 ++ %cmp.i99.i1736.1 = icmp ult i32 %8, 1333788672 ++ %conv.i.i1743.1 = fptoui float %cond.i.i.i1751.1 to i32 ++ %narrow2 = select i1 %cmp.i99.i1736.1, i32 %conv.i.i1743.1, i32 0 ++ %rv3 = or i32 %7, %narrow2 ++ %9 = bitcast float %cond.i.i.i1751 to i32 ++ %cmp.i99.i1736 = icmp ult i32 %9, 1333788672 ++ %conv.i.i1743 = fptoui float %cond.i.i.i1751 to i32 ++ %narrow4 = select i1 %cmp.i99.i1736, i32 %conv.i.i1743, i32 0 ++ %rt5 = or i32 %rv3, %narrow4 ++ %rt = zext i32 %rt5 to i64 ++ store i64 %rt, ptr %v2, align 1 ++ ret i1 false ++} +diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn ++++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +@@ -280,6 +280,7 @@ + "rdpruintrin.h", + "rdseedintrin.h", + "riscv_bitmanip.h", ++ "riscv_corev_alu.h", + "riscv_crypto.h", + "riscv_ntlh.h", + "rtmintrin.h", +diff -ruN --strip-trailing-cr a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn ++++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +@@ -632,6 +632,7 @@ + "__memory/unique_temporary_buffer.h", + "__memory/uses_allocator.h", + "__memory/uses_allocator_construction.h", ++ "__memory/voidify.h", + "__memory_resource/memory_resource.h", + "__memory_resource/monotonic_buffer_resource.h", + "__memory_resource/polymorphic_allocator.h", +diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ++++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +@@ -358,6 +358,7 @@ + td_file = "include/clang/Basic/BuiltinsRISCV.td", + td_srcs = [ + "include/clang/Basic/BuiltinsRISCV.td", ++ "include/clang/Basic/BuiltinsRISCVXCV.td", + "include/clang/Basic/BuiltinsBase.td", + ], + ) diff --git a/third_party/tsl/third_party/llvm/workspace.bzl b/third_party/tsl/third_party/llvm/workspace.bzl index ad9923cfc2b03..80f07d34d031d 100644 --- a/third_party/tsl/third_party/llvm/workspace.bzl +++ b/third_party/tsl/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "6292f117c39b9fc72da4e40328eeeda2aa94a5f2" - LLVM_SHA256 = "31f583de2e077f9289fc5efea74bd6e1a1694fda5f77f09472253cdc072f2e5e" + LLVM_COMMIT = "00128a20eec27246719d73ba427bf821883b00b4" + LLVM_SHA256 = "9fff2ccb6c262f3d5e2f98c281a0b99a585daee83742e1599709ff61cfc222af" tf_http_archive( name = name,