From 331bd5dd0e6b113433728de60a62b02aabdee225 Mon Sep 17 00:00:00 2001 From: fineg74 <61437305+fineg74@users.noreply.github.com> Date: Mon, 18 Dec 2023 18:25:10 -0800 Subject: [PATCH] [NFC][ESIMD] Fix incorrect declarations of simd_view based API (#12093) This PR fixes incorrect declarations of API that uses simd_view --- sycl/include/sycl/ext/intel/esimd/memory.hpp | 882 ++---------------- .../ext/intel/experimental/esimd/memory.hpp | 110 ++- sycl/test/esimd/block_load_store.cpp | 21 + 3 files changed, 128 insertions(+), 885 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 19d27a826d2e9..b15091e40a68f 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -121,10 +121,9 @@ __ESIMD_API simd gather(const Tx *p, simd offsets, /// @return A vector of elements read. Elements in masked out lanes are /// undefined. /// -template > +template __ESIMD_API simd gather(const Tx *p, - simd_view offsets, + simd_view offsets, simd_mask mask = 1) { return gather(p, offsets.read(), mask); } @@ -199,9 +198,8 @@ __ESIMD_API void scatter(Tx *p, simd offsets, simd vals, /// @param vals The vector to scatter. /// @param mask The access mask, defaults to all 1s. /// -template > -__ESIMD_API void scatter(Tx *p, simd_view offsets, +template +__ESIMD_API void scatter(Tx *p, simd_view offsets, simd vals, simd_mask mask = 1) { scatter(p, offsets.read(), vals, mask); } @@ -1753,58 +1751,6 @@ block_store(T *ptr, size_t byte_offset, simd vals, simd_mask<1> pred, block_store(AdjustedPtr, vals, pred, props); } -/// The semantics, assumptions and restrictions are identical to -/// (usm-bs-1) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - ext::oneapi::experimental::is_property_list_v> -block_store(T *ptr, simd_view vals, - PropertyListT props = {}) { - block_store(ptr, vals.read(), props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (usm-bs-2) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - ext::oneapi::experimental::is_property_list_v> -block_store(T *ptr, size_t byte_offset, simd_view vals, - PropertyListT props = {}) { - block_store(ptr, byte_offset, vals.read(), props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (usm-bs-3) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - ext::oneapi::experimental::is_property_list_v> -block_store(T *ptr, simd_view vals, simd_mask<1> pred, - PropertyListT props = {}) { - block_store(ptr, vals.read(), pred, props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (usm-bs-4) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - ext::oneapi::experimental::is_property_list_v> -block_store(T *ptr, size_t byte_offset, simd_view vals, - simd_mask<1> pred, PropertyListT props = {}) { - block_store(ptr, byte_offset, vals.read(), pred, props); -} - /// Each of the following block_store functions stores the vector 'vals' to a /// contiguous memory block at the address referenced by accessor 'acc', or from /// 'acc + byte_offset', The parameter 'pred' is the one element predicate. If @@ -2083,71 +2029,6 @@ block_store(AccessorT acc, simd vals, simd_mask<1> pred, block_store(acc, 0, vals, pred, Props); } -/// The semantics, assumptions and restrictions are identical to -/// (acc-bs-2) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - detail::is_device_accessor_with_v && - !sycl::detail::acc_properties::is_local_accessor_v && - ext::oneapi::experimental::is_property_list_v> -block_store(AccessorT acc, simd_view vals, - PropertyListT props = {}) { - block_store(acc, vals.read(), props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (acc-bs-1) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - detail::is_device_accessor_with_v && - !sycl::detail::acc_properties::is_local_accessor_v && - ext::oneapi::experimental::is_property_list_v> -block_store(AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, - simd_view vals, PropertyListT props = {}) { - block_store(acc, byte_offset, vals.read(), props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (acc-bs-4) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - detail::is_device_accessor_with_v && - !sycl::detail::acc_properties::is_local_accessor_v && - ext::oneapi::experimental::is_property_list_v> -block_store(AccessorT acc, simd_view vals, simd_mask<1> pred, - PropertyListT props = {}) { - block_store(acc, vals.read(), pred, props); -} - -/// The semantics, assumptions and restrictions are identical to -/// (acc-bs-3) defined above. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - detail::is_device_accessor_with_v && - !sycl::detail::acc_properties::is_local_accessor_v && - ext::oneapi::experimental::is_property_list_v> -block_store(AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, - simd_view vals, simd_mask<1> pred, - PropertyListT props = {}) { - block_store(acc, byte_offset, vals.read(), pred, props); -} - /// @} sycl_esimd_memory_block /// @} sycl_esimd_memory @@ -2432,10 +2313,9 @@ gather_rgba(const T *p, simd offsets, simd_mask mask = 1) { /// @return Read data - up to N*4 values of type \c Tx. /// template > + int N, typename OffsetObjT, typename RegionTy> __ESIMD_API simd -gather_rgba(const T *p, simd_view offsets, +gather_rgba(const T *p, simd_view offsets, simd_mask mask = 1) { return gather_rgba(p, offsets.read(), mask); } @@ -2526,10 +2406,9 @@ scatter_rgba(T *p, simd offsets, /// undefined. /// template > + int N, typename OffsetObjT, typename RegionTy> __ESIMD_API void -scatter_rgba(T *p, simd_view offsets, +scatter_rgba(T *p, simd_view offsets, simd vals, simd_mask mask = 1) { scatter_rgba(p, offsets.read(), vals, mask); @@ -4351,10 +4230,10 @@ __ESIMD_API /// props = {}); /// (usm-au0-2) /// simd /// -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd_mask mask, props = {}); /// (usm-au0-3) /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// props = {}); /// (usm-au0-4) /// /// Usage of cache hints or non-standard operation width N requires DG2 or PVC. @@ -4465,7 +4344,7 @@ atomic_update(T *p, simd byte_offset, PropertyListT props = {}) { } /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd_mask mask, props = {}); /// (usm-au0-3) /// /// A variation of \c atomic_update API with \c offsets represented as @@ -4484,21 +4363,21 @@ atomic_update(T *p, simd byte_offset, PropertyListT props = {}) { /// Other properties are ignored. /// @return A vector of the old values at the memory locations before the /// update. -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 0 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd_view offsets, simd_mask mask, +atomic_update(T *p, simd_view offsets, simd_mask mask, PropertyListT props = {}) { return atomic_update(p, offsets.read(), mask, props); } /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// props = {}); /// (usm-au0-4) /// /// A variation of \c atomic_update API with \c offsets represented as @@ -4515,15 +4394,15 @@ atomic_update(T *p, simd_view offsets, simd_mask mask, /// Other properties are ignored. /// @return A vector of the old values at the memory locations before the /// update. -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 0 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd_view byte_offset, +atomic_update(T *p, simd_view byte_offset, PropertyListT props = {}) { simd_mask mask = 1; return atomic_update(p, byte_offset.read(), mask, props); @@ -4560,31 +4439,13 @@ atomic_update(T *p, Toffset byte_offset, simd_mask mask = 1) { /// simd src0, props = {}); // (usm-au1-2) /// /// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, -/// simd_mask mask, props = {}); // (usm-au1-3) -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, -/// props = {}); // (usm-au1-4) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, -/// simd_mask mask, props = {}); // (usm-au1-5) +/// simd_mask mask, props = {}); // (usm-au1-3) /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, -/// props = {}); // (usm-au1-6) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, -/// simd_mask mask, props = {}); // (usm-au1-7) -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, -/// props = {}); // (usm-au1-8) +/// props = {}); // (usm-au1-4) /// /// simd @@ -4708,88 +4569,9 @@ atomic_update(T *p, simd byte_offset, simd src0, } /// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, -/// simd_mask mask, props = {}); // (usm-au1-3) - -/// A variation of \c atomic_update API with src0 represented as -/// \c simd_view object. - -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max, -/// \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or, -/// \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint, -/// \c atomic_op::fmax, \c atomic_op::fmin, \c atomic_op::fadd, \c -/// atomic_op::fsub, \c atomic_op::store. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The additional argument. -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. Other properties are -/// ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -/// -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 1 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - simd_mask mask, PropertyListT props = {}) { - return atomic_update(p, byte_offset, src0.read(), mask, props); -} - -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, -/// props = {}); // (usm-au1-4) - -/// A variation of \c atomic_update API with src0 represented as -/// \c simd_view object and no mask operand. - -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max, -/// \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or, -/// \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint, -/// \c atomic_op::fmax, \c atomic_op::fmin, \c atomic_op::fadd, \c -/// atomic_op::fsub, \c atomic_op::store. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The additional argument. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. Other properties are -/// ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -/// -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 1 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset, src0.read(), mask, props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, -/// simd_mask mask, props = {}); // (usm-au1-5) +/// simd_mask mask, props = {}); // (usm-au1-3) /// /// A variation of \c atomic_update API with \c byte_offset represented as /// \c simd_view object. @@ -4813,23 +4595,23 @@ atomic_update(T *p, simd byte_offset, simd_view src0, /// @return A vector of the old values at the memory locations before the /// update. /// -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 1 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd_view offsets, simd src0, +atomic_update(T *p, simd_view offsets, simd src0, simd_mask mask, PropertyListT props = {}) { return atomic_update(p, offsets.read(), src0, mask, props); } /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, -/// props = {}); // (usm-au1-6) +/// props = {}); // (usm-au1-4) /// /// A variation of \c atomic_update API with \c byte_offset represented as /// \c simd_view object and no mask operand. @@ -4851,102 +4633,20 @@ atomic_update(T *p, simd_view offsets, simd src0, /// @return A vector of the old values at the memory locations before the /// update. /// -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 1 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd_view offsets, simd src0, +atomic_update(T *p, simd_view offsets, simd src0, PropertyListT props = {}) { simd_mask mask = 1; return atomic_update(p, offsets.read(), src0, mask, props); } -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, -/// simd_mask mask, props = {}); // (usm-au1-7) -/// -/// A variation of \c atomic_update API with byte_offset and src0 represented as -/// \c simd_view objects. -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c -/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c -/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c -/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c -/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The simd_view of 32-bit or 64-bit offsets in bytes. -/// @param src0 The additional argument. -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. Other properties are -/// ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -/// -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 1 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view offsets, - simd_view src0, simd_mask mask, - PropertyListT props = {}) { - return atomic_update(p, offsets.read(), src0.read(), mask, props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, -/// props = {}); // (usm-au1-8) -/// -/// A variation of \c atomic_update API with byte_offset and src0 represented as -/// \c simd_view objects and no mask operand. -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c -/// atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c -/// atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c -/// atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c -/// atomic_op::fadd, \c atomic_op::fsub, \c atomic_op::store. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The simd_view of 32-bit or 64-bit offsets in bytes. -/// @param src0 The additional argument. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. Other properties are -/// ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -/// -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 1 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view offsets, - simd_view src0, PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, offsets.read(), src0.read(), mask, props); -} - /// A variation of \c atomic_update API with \c offset represented as /// scalar object. /// @@ -4990,67 +4690,13 @@ atomic_update(Tx *p, Toffset byte_offset, simd src0, simd_mask mask) { /// props = {}); // (usm-au2-2) /// /// simd -/// atomic_update(T *p, simd byte_offset, -/// simd src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-3) -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd src0, simd_view src1, -/// props = {}) // (usm-au2-4) -/// -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-5) -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd src1, -/// props = {}) // (usm-au2-6) -/// -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-7) -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd_view src1, -/// props = {}) // (usm-au2-8) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-9) +/// simd_mask mask, props = {}) // (usm-au2-3) /// simd -/// atomic_update(T *p, simd_view byte_offset, +/// atomic_update(T *p, simd_view byte_offset, /// simd src0, simd src1, -/// props = {}) // (usm-au2-10) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-11) -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd_view src1, -/// props = {}) // (usm-au2-12) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-13) -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd src1, -/// props = {}) // (usm-au2-14) -/// -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-15) -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd_view src1, -/// props = {}) // (usm-au2-16) +/// props = {}) // (usm-au2-4) /// /// simd @@ -5151,8 +4797,8 @@ atomic_update(T *p, simd byte_offset, simd src0, } /// simd -/// atomic_update(T *p, simd byte_offset, -/// simd src0, simd_view src1, +/// atomic_update(T *p, simd_view byte_offset, +/// simd src0, simd src1, /// simd_mask mask, props = {}) // (usm-au2-3) /// /// @tparam Op The atomic operation - can be one of the following: @@ -5170,24 +4816,24 @@ atomic_update(T *p, simd byte_offset, simd src0, // Other properties are ignored. /// @return A vector of the old values at the memory locations before the /// update. -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 2 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd byte_offset, simd src0, - simd_view src1, simd_mask mask, +atomic_update(T *p, simd_view byte_offset, + simd src0, simd src1, simd_mask mask, PropertyListT props = {}) { - return atomic_update(p, byte_offset, src0, src1.read(), mask, + return atomic_update(p, byte_offset.read(), src0, src1, mask, props); } /// simd -/// atomic_update(T *p, simd byte_offset, -/// simd src0, simd_view src1, +/// atomic_update(T *p, simd_view byte_offset, +/// simd src0, simd src1, /// props = {}) // (usm-au2-4) /// /// @tparam Op The atomic operation - can be one of the following: @@ -5203,437 +4849,21 @@ atomic_update(T *p, simd byte_offset, simd src0, // Other properties are ignored. /// @return A vector of the old values at the memory locations before the /// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd src0, - simd_view src1, PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset, src0, src1.read(), mask, - props); -} - -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-5) -/// -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - simd src1, simd_mask mask, PropertyListT props = {}) { - return atomic_update(p, byte_offset, src0.read(), src1, mask, - props); -} - -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd src1, -/// props = {}) // (usm-au2-6) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - simd src1, PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset, src0.read(), src1, mask, - props); -} - -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-7) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - simd_view src1, simd_mask mask, - PropertyListT props = {}) { - return atomic_update(p, byte_offset, src0.read(), src1.read(), mask, - props); -} - -/// simd -/// atomic_update(T *p, simd byte_offset, -/// simd_view src0, simd_view src1, -/// props = {}) // (usm-au2-8) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd byte_offset, simd_view src0, - simd_view src1, PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset, src0.read(), src1.read(), mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-9) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd src0, simd src1, simd_mask mask, - PropertyListT props = {}) { - return atomic_update(p, byte_offset.read(), src0, src1, mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd src1, -/// props = {}) // (usm-au2-10) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , +template __ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args() == 2 && ext::oneapi::experimental::is_property_list_v, simd> -atomic_update(T *p, simd_view byte_offset, +atomic_update(T *p, simd_view byte_offset, simd src0, simd src1, PropertyListT props = {}) { simd_mask mask = 1; return atomic_update(p, byte_offset.read(), src0, src1, mask, props); } -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-11) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd src0, simd_view src1, simd_mask mask, - PropertyListT props = {}) { - return atomic_update(p, byte_offset.read(), src0, src1.read(), mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd src0, simd_view src1, -/// props = {}) // (usm-au2-12) -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd src0, simd_view src1, - PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset.read(), src0, src1.read(), mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd src1, -/// simd_mask mask, props = {}) // (usm-au2-13) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd_view src0, simd src1, simd_mask mask, - PropertyListT props = {}) { - return atomic_update(p, byte_offset.read(), src0.read(), src1, mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd src1, -/// props = {}) // (usm-au2-14) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd_view src0, simd src1, - PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset.read(), src0.read(), src1, mask, - props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd_view src1, -/// simd_mask mask, props = {}) // (usm-au2-15) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param mask Operation mask, only locations with non-zero in the -/// corresponding mask element are updated. -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd_view src0, simd_view src1, - simd_mask mask, PropertyListT props = {}) { - return atomic_update(p, byte_offset.read(), src0.read(), - src1.read(), mask, props); -} - -/// simd -/// atomic_update(T *p, simd_view byte_offset, -/// simd_view src0, simd_view src1, -/// props = {}) // (usm-au2-16) -/// -/// @tparam Op The atomic operation - can be one of the following: -/// \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg. -/// @tparam T The vector element type. -/// @tparam N The number of memory locations to update. -/// @param p The USM pointer. -/// @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. -/// @param src0 The first additional argument (new value). -/// @param src1 The second additional argument (expected value). -/// @param props The parameter 'props' specifies the optional compile-time -/// properties list. Only L1/L2 properties are used. -// Other properties are ignored. -/// @return A vector of the old values at the memory locations before the -/// update. -template , - typename RegionTy = region1d_t, - typename PropertyListT = - ext::oneapi::experimental::detail::empty_properties_t> -__ESIMD_API std::enable_if_t< - __ESIMD_DNS::get_num_args() == 2 && - ext::oneapi::experimental::is_property_list_v, - simd> -atomic_update(T *p, simd_view byte_offset, - simd_view src0, simd_view src1, - PropertyListT props = {}) { - simd_mask mask = 1; - return atomic_update(p, byte_offset.read(), src0.read(), - src1.read(), mask, props); -} - /// A variation of \c atomic_update API with \c byte_offset represented as /// scalar. /// @@ -7084,8 +6314,6 @@ ESIMD_INLINE void simd_obj_impl::copy_to_impl( constexpr unsigned NumBlocks = Size / BlockSize; constexpr unsigned RemSize = Size % BlockSize; - using OffsetTy = decltype(offset); - simd Tmp{data()}; if constexpr (Align >= OperandSize::OWORD && Size % OperandSize::OWORD == 0 && detail::isPowerOf2(RemSize / OperandSize::OWORD)) { @@ -7110,7 +6338,7 @@ ESIMD_INLINE void simd_obj_impl::copy_to_impl( } else { constexpr unsigned NumChunks = N / ChunkSize; if constexpr (NumChunks > 0) { - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); ForHelper::unroll([acc, offset, &Offsets, &Tmp](unsigned Block) { scatter( @@ -7121,7 +6349,7 @@ ESIMD_INLINE void simd_obj_impl::copy_to_impl( constexpr unsigned RemN = N % ChunkSize; if constexpr (RemN > 0) { if constexpr (RemN == 1 || RemN == 8 || RemN == 16) { - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); scatter( acc, Offsets, Tmp.template select(NumChunks * ChunkSize), offset + (NumChunks * ChunkSize * sizeof(T))); @@ -7132,7 +6360,7 @@ ESIMD_INLINE void simd_obj_impl::copy_to_impl( simd Vals; Vals.template select() = Tmp.template select(NumChunks * ChunkSize); - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); scatter(acc, Offsets, Vals, offset + (NumChunks * ChunkSize * sizeof(T)), Pred); @@ -7154,8 +6382,6 @@ ESIMD_INLINE void simd_obj_impl::copy_from_impl( constexpr unsigned NumBlocks = Size / BlockSize; constexpr unsigned RemSize = Size % BlockSize; - using OffsetTy = decltype(offset); - if constexpr (Align >= OperandSize::DWORD && Size % OperandSize::OWORD == 0 && detail::isPowerOf2(RemSize / OperandSize::OWORD)) { if constexpr (NumBlocks > 0) { @@ -7179,7 +6405,7 @@ ESIMD_INLINE void simd_obj_impl::copy_from_impl( } else { constexpr unsigned NumChunks = N / ChunkSize; if constexpr (NumChunks > 0) { - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); ForHelper::unroll( [acc, offset, &Offsets, this](unsigned Block) { select(Block * ChunkSize) = @@ -7190,14 +6416,14 @@ ESIMD_INLINE void simd_obj_impl::copy_from_impl( constexpr unsigned RemN = N % ChunkSize; if constexpr (RemN > 0) { if constexpr (RemN == 1 || RemN == 8 || RemN == 16) { - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); select(NumChunks * ChunkSize) = gather( acc, Offsets, offset + (NumChunks * ChunkSize * sizeof(T))); } else { constexpr int N1 = RemN < 8 ? 8 : RemN < 16 ? 16 : 32; simd_mask_type Pred(0); Pred.template select() = 1; - simd Offsets(0u, sizeof(T)); + simd Offsets(0u, sizeof(T)); simd Vals = gather( acc, Offsets, offset + (NumChunks * ChunkSize * sizeof(T)), Pred); select(NumChunks * ChunkSize) = diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index e4cfca89f2953..15a1f17abf1f0 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -750,22 +750,22 @@ lsc_gather(const T *p, __ESIMD_NS::simd offsets, return detail::lsc_format_ret(Result); } -template < - typename T, int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, - cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, - typename Toffset, typename RegionTy = __ESIMD_NS::region1d_t> +template __ESIMD_API __ESIMD_NS::simd -lsc_gather(const T *p, __ESIMD_NS::simd_view offsets, +lsc_gather(const T *p, __ESIMD_NS::simd_view offsets, __ESIMD_NS::simd_mask pred = 1) { return lsc_gather(p, offsets.read(), pred); } -template < - typename T, int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, - cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, - typename Toffset, typename RegionTy = __ESIMD_NS::region1d_t> +template __ESIMD_API __ESIMD_NS::simd -lsc_gather(const T *p, __ESIMD_NS::simd_view offsets, +lsc_gather(const T *p, __ESIMD_NS::simd_view offsets, __ESIMD_NS::simd_mask pred, __ESIMD_NS::simd pass_thru) { return lsc_gather(p, offsets.read(), pred, @@ -1316,13 +1316,13 @@ __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd offsets, addrs.data()); } -template < - typename T, int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, - cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, - typename Toffset, typename RegionTy = __ESIMD_NS::region1d_t> -__ESIMD_API void lsc_prefetch(const T *p, - __ESIMD_NS::simd_view offsets, - __ESIMD_NS::simd_mask pred = 1) { +template +__ESIMD_API void +lsc_prefetch(const T *p, __ESIMD_NS::simd_view offsets, + __ESIMD_NS::simd_mask pred = 1) { lsc_prefetch(p, offsets.read(), pred); } @@ -1592,14 +1592,14 @@ __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd offsets, Tmp.data()); } -template < - typename T, int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, - cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, - typename Toffset, typename RegionTy = __ESIMD_NS::region1d_t> -__ESIMD_API void lsc_scatter(T *p, - __ESIMD_NS::simd_view offsets, - __ESIMD_NS::simd vals, - __ESIMD_NS::simd_mask pred = 1) { +template +__ESIMD_API void +lsc_scatter(T *p, __ESIMD_NS::simd_view offsets, + __ESIMD_NS::simd vals, + __ESIMD_NS::simd_mask pred = 1) { lsc_scatter(p, offsets.read(), vals, pred); } @@ -2707,11 +2707,10 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, - typename Toffset, - typename RegionTy = __ESIMD_NS::region1d_t> + typename OffsetObjT, typename RegionTy> __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 1, __ESIMD_NS::simd> -lsc_atomic_update(T *p, __ESIMD_NS::simd_view offsets, +lsc_atomic_update(T *p, __ESIMD_NS::simd_view offsets, __ESIMD_NS::simd src0, __ESIMD_NS::simd_mask pred = 1) { return lsc_atomic_update(p, offsets.read(), src0, @@ -2766,11 +2765,10 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, - typename Toffset, - typename RegionTy = __ESIMD_NS::region1d_t> + typename OffsetObjT, typename RegionTy> __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 2, __ESIMD_NS::simd> -lsc_atomic_update(T *p, __ESIMD_NS::simd_view offsets, +lsc_atomic_update(T *p, __ESIMD_NS::simd_view offsets, __ESIMD_NS::simd src0, __ESIMD_NS::simd src1, __ESIMD_NS::simd_mask pred = 1) { return lsc_atomic_update(p, offsets.read(), src0, @@ -3065,10 +3063,10 @@ atomic_update(T *p, simd offset, simd_mask mask) { p, offset, mask); } -template > +template __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 0, simd> -atomic_update(T *p, simd_view offsets, +atomic_update(T *p, simd_view offsets, simd_mask mask = 1) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( p, offsets, mask); @@ -3094,11 +3092,12 @@ atomic_update(T *p, simd offset, simd src0, p, offset, src0, mask); } -template > -__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 1, simd> -atomic_update(T *p, simd_view offsets, simd src0, - simd_mask mask = 1) { +template +__ESIMD_API __ESIMD_API + std::enable_if_t<__ESIMD_DNS::get_num_args() == 1, simd> + atomic_update(T *p, simd_view offsets, + simd src0, simd_mask mask = 1) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( p, offsets, src0, mask); } @@ -3126,10 +3125,10 @@ atomic_update(T *p, simd offset, simd src0, simd src1, p, offset, src1, src0, mask); } -template > +template __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 2, simd> -atomic_update(T *p, simd_view offsets, simd src0, +atomic_update(T *p, simd_view offsets, simd src0, simd src1, simd_mask mask = 1) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( p, offsets, src1, src0, mask); @@ -3156,13 +3155,12 @@ atomic_update(AccessorTy acc, simd offset, simd_mask mask) { acc, offset, mask); } -template , - typename AccessorTy> +template __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 0 && !std::is_pointer_v, simd> -atomic_update(AccessorTy acc, simd_view offsets, +atomic_update(AccessorTy acc, simd_view offsets, simd_mask mask) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( acc, offsets, mask); @@ -3192,13 +3190,12 @@ atomic_update(AccessorTy acc, simd offset, simd src0, acc, offset, src0, mask); } -template , - typename AccessorTy> -__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 1 && - !std::is_pointer_v, - simd> -atomic_update(AccessorTy acc, simd_view offsets, +template +__ESIMD_API __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 1 && + !std::is_pointer_v, + simd> +atomic_update(AccessorTy acc, simd_view offsets, simd src0, simd_mask mask) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( acc, offsets, src0, mask); @@ -3232,13 +3229,12 @@ atomic_update(AccessorTy acc, simd offset, simd src0, acc, offset, src1, src0, mask); } -template , - typename AccessorTy> +template __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args() == 2 && !std::is_pointer_v, simd> -atomic_update(AccessorTy acc, simd_view offsets, +atomic_update(AccessorTy acc, simd_view offsets, simd src0, simd src1, simd_mask mask) { return __ESIMD_ENS::lsc_atomic_update(), T, N>( acc, offsets, src1, src0, mask); diff --git a/sycl/test/esimd/block_load_store.cpp b/sycl/test/esimd/block_load_store.cpp index 95f5588a9f231..dfacdf9f2ba0a 100644 --- a/sycl/test/esimd/block_load_store.cpp +++ b/sycl/test/esimd/block_load_store.cpp @@ -69,3 +69,24 @@ kernel7(local_accessor &buf) SYCL_ESIMD_FUNCTION { // function for call to 'block_store' block_store(buf, 0, v); } + +// - Positive cases +SYCL_EXTERNAL void +kernel7(accessor &buf) + SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + auto vals_view = v1.select<8, 1>(0); + block_store(buf, 0, vals_view); +} + +SYCL_EXTERNAL void kernel8(local_accessor &buf) SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + auto vals_view = v1.select<8, 1>(0); + block_store(buf, 0, vals_view); +} + +SYCL_EXTERNAL void kernel9(int *ptr) SYCL_ESIMD_FUNCTION { + simd v1(0, 1); + auto vals_view = v1.select<8, 1>(0); + block_store(ptr, vals_view); +} \ No newline at end of file