diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index f36c40af07403..2c8d21d7dc4d8 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -537,6 +537,8 @@ The types of commands which are unsupported, and lead to this exception are: This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and `dest` are USM pointers. This corresponds to a USM copy command. +* `handler::fill(ptr, pattern, count)` - This corresponds to a USM memory + fill command. * `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory fill command. * `handler::prefetch()`. diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/include/sycl/detail/cg.hpp index 8d823c109ee34..c1fdc6857b4a0 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/include/sycl/detail/cg.hpp @@ -147,7 +147,7 @@ class CG { getAuxiliaryResources() const { return {}; } - virtual void clearAuxiliaryResources(){}; + virtual void clearAuxiliaryResources() {}; virtual ~CG() = default; @@ -247,11 +247,11 @@ class CGCopy : public CG { /// "Fill memory" command group class. class CGFill : public CG { public: - std::vector MPattern; + std::vector MPattern; AccessorImplHost *MPtr; - CGFill(std::vector Pattern, void *Ptr, CG::StorageInitHelper CGData, - detail::code_location loc = {}) + CGFill(std::vector Pattern, void *Ptr, + CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(Fill, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToFill() { return MPtr; } @@ -289,18 +289,18 @@ class CGCopyUSM : public CG { /// "Fill USM" command group class. class CGFillUSM : public CG { - std::vector MPattern; + std::vector MPattern; void *MDst; size_t MLength; public: - CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, + CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(FillUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - int getFill() { return MPattern[0]; } + const std::vector &getPattern() { return MPattern; } }; /// "Prefetch USM" command group class. @@ -378,14 +378,14 @@ class CGCopy2DUSM : public CG { /// "Fill 2D USM" command group class. class CGFill2DUSM : public CG { - std::vector MPattern; + std::vector MPattern; void *MDst; size_t MPitch; size_t MWidth; size_t MHeight; public: - CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, + CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(Fill2DUSM, std::move(CGData), std::move(loc)), @@ -395,7 +395,7 @@ class CGFill2DUSM : public CG { size_t getPitch() const { return MPitch; } size_t getWidth() const { return MWidth; } size_t getHeight() const { return MHeight; } - const std::vector &getPattern() const { return MPattern; } + const std::vector &getPattern() const { return MPattern; } }; /// "Memset 2D USM" command group class. diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index f2acd7e897399..3a41c4881febf 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -129,7 +129,7 @@ _PI_API(piextUSMHostAlloc) _PI_API(piextUSMDeviceAlloc) _PI_API(piextUSMSharedAlloc) _PI_API(piextUSMFree) -_PI_API(piextUSMEnqueueMemset) +_PI_API(piextUSMEnqueueFill) _PI_API(piextUSMEnqueueMemcpy) _PI_API(piextUSMEnqueuePrefetch) _PI_API(piextUSMEnqueueMemAdvise) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index f541c3e4e89d2..66f0a57a3060b 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -196,9 +196,10 @@ // _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and // pi_virtual_access_flags bit flags. // 15.55 Added piextEnqueueNativeCommand as well as associated types and enums +// 16.56 Replaced piextUSMEnqueueMemset with piextUSMEnqueueFill -#define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 55 +#define _PI_H_VERSION_MAJOR 16 +#define _PI_H_VERSION_MINOR 56 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -2174,22 +2175,22 @@ __SYCL_EXPORT pi_result piextUSMPitchedAlloc( /// \param ptr is the memory to be freed __SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); -/// USM Memset API +/// USM Fill API /// /// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// \param count is the size in bytes to memset +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill /// \param num_events_in_waitlist is the number of events to wait on /// \param events_waitlist is an array of events to wait on /// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memcpy API /// diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 7f5a1153f4fd0..a26da04193e3b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2826,10 +2826,14 @@ class __SYCL_EXPORT handler { setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); static_assert(is_device_copyable::value, "Pattern must be device copyable"); - parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { - T *CastedPtr = static_cast(Ptr); - CastedPtr[Index] = Pattern; - }); + if (getDeviceBackend() == backend::ext_oneapi_level_zero) { + parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { + T *CastedPtr = static_cast(Ptr); + CastedPtr[Index] = Pattern; + }); + } else { + this->fill_impl(Ptr, &Pattern, sizeof(T), Count); + } } /// Prevents any commands submitted afterward to this queue from executing @@ -3297,7 +3301,7 @@ class __SYCL_EXPORT handler { /// Length to copy or fill (for USM operations). size_t MLength = 0; /// Pattern that is used to fill memory object in case command type is fill. - std::vector MPattern; + std::vector MPattern; /// Storage for a lambda or function object. std::unique_ptr MHostKernel; /// Storage for lambda/function when using HostTask @@ -3442,6 +3446,10 @@ class __SYCL_EXPORT handler { // Helper function for getting a loose bound on work-items. id<2> computeFallbackKernelBounds(size_t Width, size_t Height); + // Function to get information about the backend for which the code is + // compiled for + backend getDeviceBackend() const; + // Common function for launching a 2D USM memcpy kernel to avoid redefinitions // of the kernel from copy and memcpy. template @@ -3553,6 +3561,9 @@ class __SYCL_EXPORT handler { }); } + // Implementation of USM fill using command for native fill. + void fill_impl(void *Dest, const void *Value, size_t ValueSize, size_t Count); + // Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy. void ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 13b44ce9a701d..e58d59547c310 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -930,12 +930,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 074909bcd6736..97c1e72fde422 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -933,12 +933,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 61ddafc09ccdc..93fab8d0fabb8 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -957,23 +957,22 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -/// USM Memset API +/// USM Fill API /// /// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset +/// @param Ptr is the ptr to fill +/// \param Pattern is the ptr with the bytes of the pattern to set +/// \param PatternSize is the size in bytes of the pattern to set +/// @param Count is the size in bytes to fill /// @param NumEventsInWaitlist is the number of events to wait on /// @param EventsWaitlist is an array of events to wait on /// @param Event is the event that represents this operation -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index e1f40fcbcd16b..8b577c4c997e3 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -933,12 +933,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index c569adb4a8839..890d0fdecf79b 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -889,12 +889,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 5e63a9c49e6a2..7dd2e0bbbac8c 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3913,11 +3913,12 @@ inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, return PI_SUCCESS; } -inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { +inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); if (!Ptr) { return PI_ERROR_INVALID_VALUE; @@ -3929,8 +3930,7 @@ inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - size_t PatternSize = 1; - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, + HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, Pattern, Count, NumEventsInWaitList, UrEventsWaitList, UREvent)); diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 59d6b27017eae..717fd6c895331 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -442,24 +442,24 @@ __SYCL_EXPORT pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, ParamValueSizeRet); } -/// USM Memset API +/// USM Fill API /// -/// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } __SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( @@ -1598,7 +1598,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { _PI_API(piEnqueueMemBufferMap) _PI_API(piEnqueueMemUnmap) _PI_API(piEnqueueMemBufferFill) - _PI_API(piextUSMEnqueueMemset) + _PI_API(piextUSMEnqueueFill) _PI_API(piEnqueueMemBufferCopyRect) _PI_API(piEnqueueMemBufferCopy) _PI_API(piextUSMEnqueueMemcpy) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 9d13a87ed13e9..fe8fc14842d6e 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -703,8 +703,10 @@ class node_impl { sycl::detail::CGFillUSM *FillUSM = static_cast(MCommandGroup.get()); Stream << "Dst: " << FillUSM->getDst() - << " Length: " << FillUSM->getLength() - << " Pattern: " << FillUSM->getFill() << "\\n"; + << " Length: " << FillUSM->getLength() << " Pattern: "; + for (auto byte : FillUSM->getPattern()) + Stream << byte; + Stream << "\\n"; } break; case sycl::detail::CG::CGTYPE::PrefetchUSM: diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 6cfa71d156062..8a6f027dd8e27 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -807,7 +807,7 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, } void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, + size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> MemRange, sycl::range<3> AccRange, sycl::id<3> Offset, unsigned int ElementSize, @@ -951,7 +951,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -972,9 +972,9 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), - DepEvents.data(), OutEvent); + Plugin->call( + Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length, + DepEvents.size(), DepEvents.data(), OutEvent); } void MemoryManager::prefetch_usm( @@ -1082,7 +1082,7 @@ void MemoryManager::copy_2d_usm( void MemoryManager::fill_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1570,7 +1570,8 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, std::vector Deps, + size_t Len, const std::vector &Pattern, + std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { if (!DstMem) @@ -1578,19 +1579,18 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( PI_ERROR_INVALID_VALUE); const PluginPtr &Plugin = Context->getPlugin(); - // Pattern is interpreted as an unsigned char so pattern size is always 1. - size_t PatternSize = 1; + Plugin->call( - CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(), + CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), Deps.data(), OutSyncPoint); } void MemoryManager::ext_oneapi_fill_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, - void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, - unsigned int ElementSize, + void *Mem, size_t PatternSize, const unsigned char *Pattern, + unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, + sycl::id<3> AccessOffset, unsigned int ElementSize, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index a47fedaedfe02..793d3b2c48252 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -120,9 +120,10 @@ class MemoryManager { const detail::EventImplPtr &OutEventImpl); static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, - sycl::id<3> AccessOffset, unsigned int ElementSize, + size_t PatternSize, const unsigned char *Pattern, + unsigned int Dim, sycl::range<3> Size, + sycl::range<3> AccessRange, sycl::id<3> AccessOffset, + unsigned int ElementSize, std::vector DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -146,7 +147,7 @@ class MemoryManager { const detail::EventImplPtr &OutEventImpl); static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -171,7 +172,7 @@ class MemoryManager { static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -241,7 +242,7 @@ class MemoryManager { static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, + size_t Len, const std::vector &Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); @@ -249,7 +250,7 @@ class MemoryManager { ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, void *Mem, - size_t PatternSize, const char *Pattern, + size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 066bc83fb2427..4adb0f2dccac1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -172,12 +172,12 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - + const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, - Count, Value); + Count, Pattern); } void report(const code_location &CodeLoc) { @@ -214,7 +214,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "queue_id", MQueueID); }); xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); - // Notify XPTI about the memset submission + // Notify XPTI about the memcpy submission PrepareNotify.notify(); // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 6fd170746d28d..a8a839b65b8f4 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -172,7 +172,7 @@ addCounterInit(handler &CGH, std::shared_ptr &Queue, auto EventImpl = std::make_shared(Queue); EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context())); EventImpl->setStateIncomplete(); - MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), 0, {}, + MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &EventImpl->getHandleRef(), EventImpl); CGH.depends_on(createSyclObjFromImpl(EventImpl)); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index b9b2fd6c5b280..583a3c5f0c386 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2854,7 +2854,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), - Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps), + Fill->getLength(), Fill->getPattern(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; @@ -3019,7 +3019,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { case CG::CGTYPE::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), - Fill->getFill(), std::move(RawEvents), Event, + Fill->getPattern(), std::move(RawEvents), Event, MEvent); return PI_SUCCESS; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 14aec10e67313..7dae7fdbf5726 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -956,6 +956,15 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { setType(detail::CG::AdviseUSM); } +void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, + size_t Count) { + MDstPtr = Dest; + MPattern.resize(ValueSize); + std::memcpy(MPattern.data(), Value, ValueSize); + MLength = Count * ValueSize; + setType(detail::CG::FillUSM); +} + void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { @@ -986,7 +995,7 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height) { // Checks done in callers. MDstPtr = Dest; - MPattern.push_back(static_cast(Value)); + MPattern.push_back(static_cast(Value)); MImpl->MDstPitch = DestPitch; MImpl->MWidth = Width; MImpl->MHeight = Height; @@ -1614,6 +1623,13 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)}; } +backend handler::getDeviceBackend() const { + if (MGraph) + return MGraph->getDevice().get_backend(); + else + return MQueue->getDeviceImplPtr()->getBackend(); +} + void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { MImpl->HostPipeName = Name.data(); diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 48ab65c68896c..111e72d121cfc 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -1,6 +1,10 @@ // RUN: %{build} -o %t.out - -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// +// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// due to https://github.com/intel/llvm/issues/13787 +// +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// // REQUIRES: aspect-usm_shared_allocations // The test checks that the last parameter is `nullptr` for all PI calls that // should discard events. @@ -12,7 +16,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -20,8 +24,9 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// Level-zero backend doesn't use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -47,7 +52,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -57,7 +62,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -65,8 +70,9 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// Level-zero backend doesn't use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -92,7 +98,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index 96d53a632beb6..d27be6ab48abc 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -1,6 +1,10 @@ // RUN: %{build} -o %t.out - -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// +// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// due to https://github.com/intel/llvm/issues/13787 +// +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// // REQUIRES: aspect-usm_shared_allocations // The test checks that the last parameter is not `nullptr` for all PI calls // that should discard events. @@ -12,7 +16,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -22,8 +26,9 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// Level-zero backend doesn't use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -58,7 +63,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -69,7 +74,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -79,8 +84,9 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// Level-zero backend doesn't use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -115,7 +121,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp index febcd7249a086..37eb23e4e6549 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp index acadbf01e0f91..583fa998794b0 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp index 4752b1fdbbfc6..55fa99d1a39a6 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp index 5d38087527a74..d09ca4aa3b9b0 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp index 07acad25287af..8dc20d600be8f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp index 7afbf67ebe788..557fbcce4a069 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp index 9506e99cd73ad..84a5d42f0da04 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -2,6 +2,8 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{ %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl // // Tests adding a USM memset queue shortcut operation as a graph node. diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index 710f6c4991782..cc073064c724c 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -7,9 +7,7 @@ void foo() { sycl::queue Q; - Q.submit([](sycl::handler &CGH) { - CGH.single_task([]() {}); - }); + Q.submit([](sycl::handler &CGH) { CGH.single_task([]() {}); }); } // clang-format off @@ -118,10 +116,10 @@ void foo() { // CHECK-NEXT: 384 | void * MSrcPtr // CHECK-NEXT: 392 | void * MDstPtr // CHECK-NEXT: 400 | size_t MLength -// CHECK-NEXT: 408 | class std::vector MPattern -// CHECK-NEXT: 408 | struct std::_Vector_base > (base) -// CHECK-NEXT: 408 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 408 | class std::allocator (base) (empty) +// CHECK-NEXT: 408 | class std::vector MPattern +// CHECK-NEXT: 408 | struct std::_Vector_base > (base) +// CHECK-NEXT: 408 | struct std::_Vector_base >::_Vector_impl _M_impl +// CHECK-NEXT: 408 | class std::allocator (base) (empty) // CHECK: 408 | pointer _M_start // CHECK-NEXT: 416 | pointer _M_finish // CHECK-NEXT: 424 | pointer _M_end_of_storage diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index 4808f55c487cd..7ec2569938519 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -161,11 +161,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 2c421ad2fec55..a75fc9af455c9 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -161,11 +161,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 7e8fc7500f4a4..53c7f0e2ed531 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -160,11 +160,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 10f19aac80652..3d347196f757d 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -161,11 +161,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index c0e1b76c4d04f..7e174ffbb2a4c 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -160,11 +160,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index ff46e12c50335..e96b3ec995331 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3537,6 +3537,7 @@ _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm +_ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb @@ -4120,6 +4121,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_cont _ZNK4sycl3_V17context9getNativeEv _ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv +_ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 939b0ffa647ac..0bc26c55b8a73 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4068,6 +4068,7 @@ ?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z +?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ @@ -4104,6 +4105,7 @@ ?getCommandGraph@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@XZ ?getContextImplPtr@handler@_V1@sycl@@AEBAAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ +?getDeviceBackend@handler@_V1@sycl@@AEBA?AW4backend@23@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 9cdf27dd98ddc..1eee3474c51ed 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -214,8 +214,8 @@ class USMAnalyzer { USMAnalyzer::handleUSMSharedAlloc); ArgHandlerPreCall.set_piextUSMFree(USMAnalyzer::handleUSMFree); ArgHandlerPreCall.set_piMemBufferCreate(USMAnalyzer::handleMemBufferCreate); - ArgHandlerPreCall.set_piextUSMEnqueueMemset( - USMAnalyzer::handleUSMEnqueueMemset); + ArgHandlerPreCall.set_piextUSMEnqueueFill( + USMAnalyzer::handleUSMEnqueueFill); ArgHandlerPreCall.set_piextUSMEnqueueMemcpy( USMAnalyzer::handleUSMEnqueueMemcpy); ArgHandlerPreCall.set_piextUSMEnqueuePrefetch( @@ -350,11 +350,11 @@ class USMAnalyzer { } } - static void handleUSMEnqueueMemset(const pi_plugin &, - std::optional, pi_queue, - void *ptr, pi_int32, size_t numBytes, - pi_uint32, const pi_event *, pi_event *) { - CheckPointerValidness("input parameter", ptr, numBytes, "memset"); + static void handleUSMEnqueueFill(const pi_plugin &, std::optional, + pi_queue, void *ptr, const void *, size_t, + size_t numBytes, pi_uint32, const pi_event *, + pi_event *) { + CheckPointerValidness("input parameter", ptr, numBytes, "fill"); } static void handleUSMEnqueueMemcpy(const pi_plugin &, diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 842e3cf271216..10c86342b3a4c 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -61,12 +61,12 @@ inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, return PI_SUCCESS; } -thread_local size_t counter_piextUSMEnqueueMemset = 0; -inline pi_result redefined_piextUSMEnqueueMemset(pi_queue, void *, pi_int32, - size_t, pi_uint32, - const pi_event *, - pi_event *event) { - ++counter_piextUSMEnqueueMemset; +thread_local size_t counter_piextUSMEnqueueFill = 0; +inline pi_result redefined_piextUSMEnqueueFill(pi_queue, void *, const void *, + size_t, size_t, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueFill; EXPECT_EQ(event, nullptr); return PI_SUCCESS; } @@ -112,7 +112,7 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { void SetUp() override { counter_piEnqueueKernelLaunch = 0; counter_piextUSMEnqueueMemcpy = 0; - counter_piextUSMEnqueueMemset = 0; + counter_piextUSMEnqueueFill = 0; counter_piextUSMEnqueuePrefetch = 0; counter_piextUSMEnqueueMemAdvise = 0; counter_piEnqueueEventsWaitWithBarrier = 0; @@ -362,8 +362,8 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { } TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemset); + Mock.redefine( + redefined_piextUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); @@ -372,21 +372,21 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemset); + Mock.redefine( + redefined_piextUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); oneapiext::memset(Q, Dst, 1, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); free(Dst, Q); } diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index 27967973c1363..3860833256048 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -75,9 +75,9 @@ pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, return PI_SUCCESS; } -static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, - pi_event *event) { +static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32, + const pi_event *, pi_event *event) { *event = reinterpret_cast(new int{}); return PI_SUCCESS; } @@ -97,7 +97,7 @@ TEST(GetNative, GetNativeHandle) { Mock.redefineBefore(redefinedMemRetain); Mock.redefineBefore( redefinedMemBufferCreate); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); context Context(Plt); diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 4e41a88c14544..5541db5688c53 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1259,13 +1259,15 @@ inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { return PI_SUCCESS; } -inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +inline pi_result mock_piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { if (event) *event = createDummyHandle(); + return PI_SUCCESS; } diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 30cdd2b37393e..03ff8c538d7ed 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -48,8 +48,8 @@ pi_result redefinedUSMEnqueueMemcpyAfter(pi_queue, pi_bool, void *, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, +pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, const void *, size_t, + size_t, pi_uint32, const pi_event *, pi_event *Event) { // Set MEMSET to the event produced by the original USMEnqueueMemcpy MEMSET = *Event; @@ -64,7 +64,7 @@ TEST(USM, NoOpPreservesDependencyChain) { redefinedEnqueueEventsWaitAfter); Mock.redefineAfter( redefinedUSMEnqueueMemcpyAfter); - Mock.redefineAfter( + Mock.redefineAfter( redefinedUSMEnqueueMemsetAfter); context Ctx{Plt.get_devices()[0]}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index 8b2d72055d847..f9ec3fb6083ac 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -37,7 +37,8 @@ pi_result redefinedQueueCreateEx(pi_context context, pi_device device, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -88,7 +89,7 @@ TEST(QueueWait, QueueWaitTest) { Mock.redefineBefore( redefinedQueueCreateEx); Mock.redefineBefore(redefinedQueueFinish); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); Mock.redefineBefore(redefinedEventsWait); Mock.redefineBefore( diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 929f8735bc85f..58b28237a4958 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -61,7 +61,8 @@ inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, ExecutedCommands.push_back({CommandType::KERNEL, EventsCount}); return PI_SUCCESS; } -inline pi_result customextUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, +inline pi_result customextUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32 EventsCount, const pi_event *, pi_event *) { ExecutedCommands.push_back({CommandType::MEMSET, EventsCount}); @@ -73,7 +74,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); @@ -126,7 +127,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 88c5fae49394c..c634f2dfce299 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -145,7 +145,8 @@ TEST_F(QueueApiFailures, QueueSingleTask) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -154,7 +155,7 @@ pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, } TEST_F(QueueApiFailures, QueueMemset) { - MockPlugin.redefine( + MockPlugin.redefine( redefinedUSMEnqueueMemset); MockPlugin.redefine( redefinedPluginGetLastError); @@ -241,18 +242,17 @@ TEST_F(QueueApiFailures, QueueCopy) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +pi_result redefinedUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } TEST_F(QueueApiFailures, QueueFill) { - MockPlugin.redefine( - redefinedEnqueueMemBufferFill); + MockPlugin.redefine( + redefinedUSMEnqueueFill); MockPlugin.redefine( redefinedPluginGetLastError); sycl::queue Q;