From 9f952a9f222d618592e3bb11ccc8e8ea48a4815d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 2 Aug 2024 19:01:13 +0100 Subject: [PATCH 01/14] [SPEC] Add support for command-buffer kernel updates --- include/ur_api.h | 19 +++++++++++--- include/ur_ddi.h | 2 ++ include/ur_print.hpp | 23 +++++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 7 +++--- scripts/core/exp-command-buffer.yml | 22 ++++++++++++---- source/adapters/cuda/command_buffer.cpp | 2 ++ source/adapters/hip/command_buffer.cpp | 2 ++ source/adapters/level_zero/command_buffer.cpp | 2 ++ source/adapters/mock/ur_mockddi.cpp | 9 +++++++ source/adapters/native_cpu/command_buffer.cpp | 2 +- source/adapters/opencl/command_buffer.cpp | 2 ++ source/loader/layers/tracing/ur_trcddi.cpp | 13 ++++++++-- source/loader/layers/validation/ur_valddi.cpp | 15 +++++++++-- source/loader/ur_ldrddi.cpp | 25 +++++++++++++++++-- source/loader/ur_libapi.cpp | 24 ++++++++++++------ source/ur_api.cpp | 16 +++++++++--- .../buffer_fill_kernel_update.cpp | 11 +++++++- .../buffer_saxpy_kernel_update.cpp | 4 ++- .../conformance/exp_command_buffer/fixtures.h | 6 +++-- .../exp_command_buffer/invalid_update.cpp | 13 +++++++--- .../exp_command_buffer/ndrange_update.cpp | 6 ++++- .../usm_fill_kernel_update.cpp | 8 ++++-- .../usm_saxpy_kernel_update.cpp | 8 ++++-- .../exp_enqueue_native/CMakeLists.txt | 23 ++++++++++++++--- 24 files changed, 218 insertions(+), 46 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index a707d40a3f..69320f04eb 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8246,6 +8246,9 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_handle_t hNewKernel; ///< [in] The new kernel handle. If this value is equal to the current + ///< kernel handle associated + ///< with the command, then only the arguments to the kernel will be updated. uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList. uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList. uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList. @@ -8401,6 +8404,11 @@ urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. @@ -8908,6 +8916,7 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hCommand` +/// + `NULL == pUpdateKernelLaunch->hNewKernel` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pUpdateKernelLaunch` /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE @@ -8915,10 +8924,10 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX @@ -11515,6 +11524,8 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { const size_t **ppGlobalWorkOffset; const size_t **ppGlobalWorkSize; const size_t **ppLocalWorkSize; + uint32_t *pnumKernelAlternatives; + ur_kernel_handle_t **pphKernelAlternatives; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 834c659c13..d2f79f4515 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, const size_t *, uint32_t, + ur_kernel_handle_t *, + uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_command_handle_t *); diff --git a/include/ur_print.hpp b/include/ur_print.hpp index f71cc12b32..6bf77e4023 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -9921,6 +9921,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".hNewKernel = "; + + ur::details::printPtr(os, + (params.hNewKernel)); + os << ", "; os << ".numNewMemObjArgs = "; @@ -15919,6 +15925,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppLocalWorkSize)); + os << ", "; + os << ".numKernelAlternatives = "; + + os << *(params->pnumKernelAlternatives); + + os << ", "; + os << ".phKernelAlternatives = {"; + for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphKernelAlternatives))[i]); + } + os << "}"; + os << ", "; os << ".numSyncPointsInWaitList = "; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index c23519cf67..7c03ba3211 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -144,8 +144,8 @@ were obtained from. // sync-point ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr, nullptr); + pLocalWorkSize, 0, nullptr, 1, + &syncPoint, nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -191,7 +191,7 @@ parameters to the kernel and the execution ND-Range. ${x}_exp_command_buffer_command_handle_t hCommand; ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 0, nullptr, + pLocalWorkSize, 0, nullptr, 0, nullptr, nullptr, &hCommand); // Close the command-buffer before updating @@ -220,6 +220,7 @@ parameters to the kernel and the execution ND-Range. ${x}_exp_command_buffer_update_kernel_launch_desc_t update { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + hKernel //hNewKernel 2, // numNewMemobjArgs 0, // numNewPointerArgs 0, // numNewValueArgs diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 72b4e63f74..f097270b4a 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -170,6 +170,11 @@ desc: "Descriptor type for updating a kernel launch command." base: $x_base_desc_t name: $x_exp_command_buffer_update_kernel_launch_desc_t members: + - type: $x_kernel_handle_t + name: hNewKernel + desc: | + [in] The new kernel handle. If this value is equal to the current kernel handle associated + with the command, then only the arguments to the kernel will be updated. - type: uint32_t name: numNewMemObjArgs desc: "[in] Length of pNewMemObjArgList." @@ -307,6 +312,14 @@ params: - type: "const size_t*" name: pLocalWorkSize desc: "[in][optional] Local work size to use when executing kernel." + - type: uint32_t + name: "numKernelAlternatives" + desc: "[in] The number of kernel alternatives provided in pKernelAlternatives." + - type: $x_kernel_handle_t* + name: "phKernelAlternatives" + desc: | + [in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this + command after the command-buffer is finalized. - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -909,17 +922,16 @@ params: - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" name: pUpdateKernelLaunch desc: "[in] Struct defining how the kernel command is to be updated." - returns: - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: - "If update functionality is not supported by the device." - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size." + - "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." + - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." + - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size." + - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 2fdb6b08a3..ca1737c936 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -344,6 +344,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t /*numKernelAlternatives*/, + ur_kernel_handle_t * /*phKernelAlternatives*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 4ff38626af..ef6e6fe83c 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -312,6 +312,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t /*numKernelAlternatives*/, + ur_kernel_handle_t * /*phKernelAlternatives*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 1bf4f26716..16876976ca 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -733,6 +733,8 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + uint32_t /*numKernelAlternatives*/, + ur_kernel_handle_t * /*phKernelAlternatives*/, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *RetSyncPoint, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 20d9cc5bed..594a173ff1 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8350,6 +8350,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8369,6 +8376,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pGlobalWorkOffset, &pGlobalWorkSize, &pLocalWorkSize, + &numKernelAlternatives, + &phKernelAlternatives, &numSyncPointsInWaitList, &pSyncPointWaitList, &pSyncPoint, diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index fde6c03b86..2c5e350860 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -49,7 +49,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, - const ur_exp_command_buffer_sync_point_t *, + ur_kernel_handle_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 5698f36928..34cb7f1a3c 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -140,6 +140,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t /*numKernelAlternatives*/, + ur_kernel_handle_t * /*phKernelAlternatives*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 7f37c23417..4be0fba5b0 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6494,6 +6494,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -6518,6 +6525,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pGlobalWorkOffset, &pGlobalWorkSize, &pLocalWorkSize, + &numKernelAlternatives, + &phKernelAlternatives, &numSyncPointsInWaitList, &pSyncPointWaitList, &pSyncPoint, @@ -6530,8 +6539,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, - phCommand); + pLocalWorkSize, numKernelAlternatives, phKernelAlternatives, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index e41623b15c..eb13922c9f 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8055,6 +8055,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8105,8 +8112,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, - phCommand); + pLocalWorkSize, numKernelAlternatives, phKernelAlternatives, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, phCommand); return result; } @@ -8931,6 +8938,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + if (NULL == pUpdateKernelLaunch->hNewKernel) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + if (NULL == pUpdateKernelLaunch) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 26f55c071f..5d4d2ee380 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7106,6 +7106,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7138,11 +7145,20 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // convert loader handle to platform handle hKernel = reinterpret_cast(hKernel)->handle; + // convert loader handles to platform handles + auto phKernelAlternativesLocal = + std::vector(numKernelAlternatives); + for (size_t i = 0; i < numKernelAlternatives; ++i) { + phKernelAlternativesLocal[i] = + reinterpret_cast(phKernelAlternatives[i]) + ->handle; + } + // forward to device-platform result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, - phCommand); + pLocalWorkSize, numKernelAlternatives, phKernelAlternativesLocal.data(), + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, phCommand); if (UR_RESULT_SUCCESS != result) { return result; @@ -7872,6 +7888,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( // Deal with any struct parameters that have handle members we need to convert. auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; + pUpdateKernelLaunchLocal.hNewKernel = + reinterpret_cast( + pUpdateKernelLaunchLocal.hNewKernel) + ->handle; + std::vector pUpdateKernelLaunchpNewMemObjArgList; for (uint32_t i = 0; i < pUpdateKernelLaunch->numNewMemObjArgs; i++) { diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 05b0c71995..a981d4f032 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7542,6 +7542,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7559,10 +7566,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, phCommand); + return pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numKernelAlternatives, phKernelAlternatives, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -8294,6 +8301,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hCommand` +/// + `NULL == pUpdateKernelLaunch->hNewKernel` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pUpdateKernelLaunch` /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE @@ -8301,10 +8309,10 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 11b9dea7e9..7fbe274644 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6398,6 +6398,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + uint32_t + numKernelAlternatives, ///< [in] The number of kernel alternatives provided in + ///< pKernelAlternatives. + ur_kernel_handle_t * + phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels + ///< handles that might be used to update the kernel in this + ///< command after the command-buffer is finalized. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7007,6 +7014,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hCommand` +/// + `NULL == pUpdateKernelLaunch->hNewKernel` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pUpdateKernelLaunch` /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE @@ -7014,10 +7022,10 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index 78e1ffd009..531801c0ee 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -49,7 +49,8 @@ struct BufferFillCommandTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -123,6 +124,7 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -175,6 +177,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -225,6 +228,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -253,6 +257,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -271,6 +276,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -315,6 +321,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -342,6 +349,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -398,6 +406,7 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 2, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index 55e6773cb7..35cabe44f9 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -83,7 +83,8 @@ struct BufferSaxpyKernelTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -183,6 +184,7 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index c144ac5fa2..d3e7e5f79e 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -181,12 +181,14 @@ struct urCommandBufferCommandExpTest // Append 2 kernel commands to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle_2)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle_2)); ASSERT_NE(command_handle_2, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp index afcb279fa9..78f76c2001 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -36,7 +36,8 @@ struct InvalidUpdateTest // Append kernel command to command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); } @@ -89,6 +90,7 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -119,7 +121,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { ur_exp_command_buffer_command_handle_t test_command_handle = nullptr; EXPECT_SUCCESS(urCommandBufferAppendKernelLaunchExp( test_cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, - &local_size, 0, nullptr, nullptr, &test_command_handle)); + &local_size, 0, nullptr, 0, nullptr, nullptr, &test_command_handle)); EXPECT_NE(test_command_handle, nullptr); EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); @@ -139,6 +141,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -175,6 +178,7 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -200,7 +204,8 @@ TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, nullptr, 0, nullptr, nullptr, &second_command_handle)); + &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, + &second_command_handle)); ASSERT_NE(second_command_handle, nullptr); EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -211,6 +216,7 @@ TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -243,6 +249,7 @@ TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index 3c053fe4b9..c107dfab5c 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -35,7 +35,7 @@ struct NDRangeUpdateTest ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), global_size.data(), local_size.data(), 0, - nullptr, nullptr, &command_handle)); + nullptr, 0, nullptr, nullptr, &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -128,6 +128,7 @@ TEST_P(NDRangeUpdateTest, Update3D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -172,6 +173,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -216,6 +218,7 @@ TEST_P(NDRangeUpdateTest, Update1D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -249,6 +252,7 @@ TEST_P(NDRangeUpdateTest, Invalid) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 606744cd86..6a86f30cea 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -37,7 +37,8 @@ struct USMFillCommandTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -119,6 +120,7 @@ TEST_P(USMFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -172,6 +174,7 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -233,7 +236,7 @@ struct USMMultipleFillCommandTest // Append kernel and store returned handle ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &elements, &local_size, 0, nullptr, nullptr, + &elements, &local_size, 0, nullptr, 0, nullptr, nullptr, &command_handles[k])); ASSERT_NE(command_handles[k], nullptr); } @@ -323,6 +326,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp index 0cb50cb3f1..ea32f7e046 100644 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -81,7 +81,8 @@ struct USMSaxpyKernelTest : USMSaxpyKernelTestBase { // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -147,6 +148,7 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -181,7 +183,7 @@ struct USMMultiSaxpyKernelTest : USMSaxpyKernelTestBase { for (unsigned node = 0; node < nodes; node++) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, &command_handles[node])); ASSERT_NE(command_handles[node], nullptr); } @@ -252,6 +254,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -317,6 +320,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext + kernel, //hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 8638fa1349..704c84470e 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -5,14 +5,31 @@ if (UR_BUILD_ADAPTER_CUDA) add_conformance_test_with_kernels_environment( - exp_enqueue_native + exp_enqueue_native_cuda enqueue_native_cuda.cpp ) - target_include_directories(test-exp_enqueue_native PRIVATE + target_include_directories(test-exp_enqueue_native_cuda PRIVATE ${PROJECT_SOURCE_DIR}/source ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) - target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) + target_link_libraries(test-exp_enqueue_native_cuda PRIVATE cudadrv) +endif() + +if (UR_BUILD_ADAPTER_L0) + add_conformance_test_with_kernels_environment( + exp_enqueue_native_l0 + enqueue_native_level_zero.cpp + ) + target_link_libraries(test-exp_enqueue_native_l0 PRIVATE + LevelZeroLoader + LevelZeroLoader-Headers + ) + + target_include_directories(test-exp_enqueue_native_l0 PRIVATE + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero + LevelZeroLoader-Headers + ) endif() # TODO: Add more tests for different triples From f17c954fc917f9dc5aa28a31ffeeb75a43b621d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 5 Aug 2024 18:54:06 +0100 Subject: [PATCH 02/14] Add kernel update to command-buffer documentation --- scripts/core/EXP-COMMAND-BUFFER.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 7c03ba3211..387b77a34c 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -173,7 +173,14 @@ ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. Updating kernel commands is done by passing the new kernel configuration to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of the kernel command to update. Configurations that can be changed are the -parameters to the kernel and the execution ND-Range. +kernel handle, the parameters to the kernel and the execution ND-Range. + +To update the kernel handle, it is necessary to first register the new +kernel handle before the command-buffer is finalized. This can be done +using the ``phKernelAlternatives`` parameter of +${x}CommandBufferUpdateKernelLaunchExp. The command-buffer can then +be updated to use the new kernel handle by passing it to +${x}CommandBufferUpdateKernelLaunchExp. .. parsed-literal:: @@ -187,12 +194,14 @@ parameters to the kernel and the execution ND-Range. ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); // Append a kernel command which has two buffer parameters, an input - // and an output. + // and an output. Register hNewKernel as an alternative kernel handle + // which can later be used to change the kernel handle associated + // with this command. ${x}_exp_command_buffer_command_handle_t hCommand; ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 0, nullptr, 0, nullptr, - nullptr, &hCommand); + pLocalWorkSize, 1, &hNewKernel, + 0, nullptr, nullptr, &hCommand); // Close the command-buffer before updating ${x}CommandBufferFinalizeExp(hCommandBuffer); @@ -220,7 +229,7 @@ parameters to the kernel and the execution ND-Range. ${x}_exp_command_buffer_update_kernel_launch_desc_t update { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - hKernel //hNewKernel + hNewKernel // hNewKernel 2, // numNewMemobjArgs 0, // numNewPointerArgs 0, // numNewValueArgs From 2c46496683251c4cf9533561891053aca7c0e062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 5 Aug 2024 19:04:12 +0100 Subject: [PATCH 03/14] Update changelog --- scripts/core/EXP-COMMAND-BUFFER.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 387b77a34c..daee3531a8 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -350,6 +350,8 @@ Changelog +-----------+-------------------------------------------------------+ | 1.4 | Add function definitions for kernel command update | +-----------+-------------------------------------------------------+ +| 1.5 | Add support for updating kernel handles. | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- From f37046dbb9dff037a37dd529ecba45b462dcdfca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 5 Aug 2024 19:11:22 +0100 Subject: [PATCH 04/14] update contributor list --- scripts/core/EXP-COMMAND-BUFFER.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index daee3531a8..b5e2d643de 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -360,3 +360,4 @@ Contributors * Ewan Crawford `ewan@codeplay.com `_ * Maxime France-Pillois `maxime.francepillois@codeplay.com `_ * Aaron Greig `aaron.greig@codeplay.com `_ +* Fábio Mestre `fabio.mestre@codeplay.com `_ From bbd26586c533855f2cd9bf75ae5cb384cc5d41cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Wed, 7 Aug 2024 10:58:30 +0100 Subject: [PATCH 05/14] Update scripts/core/exp-command-buffer.yml Co-authored-by: Ewan Crawford --- scripts/core/exp-command-buffer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index f097270b4a..2eabb5e782 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -314,7 +314,7 @@ params: desc: "[in][optional] Local work size to use when executing kernel." - type: uint32_t name: "numKernelAlternatives" - desc: "[in] The number of kernel alternatives provided in pKernelAlternatives." + desc: "[in] The number of kernel alternatives provided in phKernelAlternatives." - type: $x_kernel_handle_t* name: "phKernelAlternatives" desc: | From 85eb8067c9cac4c1e156792a091dbc6418e0303f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 8 Aug 2024 14:15:54 +0100 Subject: [PATCH 06/14] Address review comments --- scripts/core/EXP-COMMAND-BUFFER.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index b5e2d643de..94df623481 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -175,11 +175,11 @@ to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of the kernel command to update. Configurations that can be changed are the kernel handle, the parameters to the kernel and the execution ND-Range. -To update the kernel handle, it is necessary to first register the new -kernel handle before the command-buffer is finalized. This can be done +Kernel handles that might be used to update the kernel of a command, need +to be registered when the command is created. This can be done using the ``phKernelAlternatives`` parameter of -${x}CommandBufferUpdateKernelLaunchExp. The command-buffer can then -be updated to use the new kernel handle by passing it to +${x}CommandBufferAppendKernelLaunchExp. The command can then be updated +to use the new kernel handle by passing it to ${x}CommandBufferUpdateKernelLaunchExp. .. parsed-literal:: From 1416158aa7d4e64e869bcb0bf3ccf3d35b0d81be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 2 Sep 2024 16:10:16 +0100 Subject: [PATCH 07/14] Add testing for binary update --- test/conformance/device_code/CMakeLists.txt | 1 + test/conformance/device_code/fill_usm_2d.cpp | 30 ++ .../exp_command_buffer/CMakeLists.txt | 13 +- .../exp_command_buffer/commands.cpp | 272 ++++++------- .../conformance/exp_command_buffer/fixtures.h | 323 ++++++++-------- .../buffer_fill_kernel_update.cpp | 2 +- .../buffer_saxpy_kernel_update.cpp | 2 +- .../{ => update}/invalid_update.cpp | 2 +- .../update/kernel_handle_update.cpp | 291 ++++++++++++++ .../{ => update}/ndrange_update.cpp | 2 +- .../update/usm_fill_kernel_update.cpp | 357 ++++++++++++++++++ .../update/usm_saxpy_kernel_update.cpp | 354 +++++++++++++++++ .../exp_enqueue_native/CMakeLists.txt | 29 +- 13 files changed, 1352 insertions(+), 326 deletions(-) create mode 100644 test/conformance/device_code/fill_usm_2d.cpp rename test/conformance/exp_command_buffer/{ => update}/buffer_fill_kernel_update.cpp (99%) rename test/conformance/exp_command_buffer/{ => update}/buffer_saxpy_kernel_update.cpp (99%) rename test/conformance/exp_command_buffer/{ => update}/invalid_update.cpp (99%) create mode 100644 test/conformance/exp_command_buffer/update/kernel_handle_update.cpp rename test/conformance/exp_command_buffer/{ => update}/ndrange_update.cpp (99%) create mode 100644 test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 5445531961..912402b7a5 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -141,6 +141,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_2d.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_3d.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm_2d.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/inc.cpp) diff --git a/test/conformance/device_code/fill_usm_2d.cpp b/test/conformance/device_code/fill_usm_2d.cpp new file mode 100644 index 0000000000..76fc0ae7ea --- /dev/null +++ b/test/conformance/device_code/fill_usm_2d.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + + size_t nd_range_x = 8; + size_t nd_range_y = 8; + + auto nd_range = sycl::range<2>(nd_range_x, nd_range_y); + + std::vector A(nd_range_x * nd_range_y, 1); + uint32_t val = 42; + sycl::queue sycl_queue; + + auto work_range = sycl::nd_range<2>(nd_range, sycl::range<2>(1, 1)); + + uint32_t *data = sycl::malloc_shared(nd_range_x * nd_range_y, sycl_queue); + sycl_queue.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + work_range, [data, val](sycl::nd_item<2> item_id) { + auto id = item_id.get_global_linear_id(); + data[id] = val; + }); + }); + return 0; +} diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt index a28d692d9b..0162a2dfe3 100644 --- a/test/conformance/exp_command_buffer/CMakeLists.txt +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -4,14 +4,15 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception add_conformance_test_with_kernels_environment(exp_command_buffer - buffer_fill_kernel_update.cpp - usm_fill_kernel_update.cpp - buffer_saxpy_kernel_update.cpp - usm_saxpy_kernel_update.cpp - ndrange_update.cpp release.cpp retain.cpp - invalid_update.cpp commands.cpp fill.cpp + update/buffer_fill_kernel_update.cpp + update/invalid_update.cpp + update/kernel_handle_update.cpp + update/usm_fill_kernel_update.cpp + update/buffer_saxpy_kernel_update.cpp + update/ndrange_update.cpp + update/usm_saxpy_kernel_update.cpp ) diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 412e4ab6de..4066a1d3a2 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -8,197 +8,197 @@ struct urCommandBufferCommandsTest : uur::command_buffer::urCommandBufferExpTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE( - uur::command_buffer::urCommandBufferExpTest::SetUp()); - - // Allocate USM pointers - for (auto &device_ptr : device_ptrs) { - ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, - allocation_size, &device_ptr)); - ASSERT_NE(device_ptr, nullptr); - } - - for (auto &buffer : buffers) { - ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, - allocation_size, nullptr, - &buffer)); - - ASSERT_NE(buffer, nullptr); - } + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::urCommandBufferExpTest::SetUp()); + + // Allocate USM pointers + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); } - void TearDown() override { - for (auto &device_ptr : device_ptrs) { - if (device_ptr) { - EXPECT_SUCCESS(urUSMFree(context, device_ptr)); - } - } - - for (auto &buffer : buffers) { - if (buffer) { - EXPECT_SUCCESS(urMemRelease(buffer)); - } - } - - UUR_RETURN_ON_FATAL_FAILURE( - uur::command_buffer::urCommandBufferExpTest::TearDown()); + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + + ASSERT_NE(buffer, nullptr); + } + } + + void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } } - static constexpr unsigned elements = 16; - static constexpr size_t allocation_size = elements * sizeof(uint32_t); + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } - std::array device_ptrs = {nullptr, nullptr}; - std::array buffers = {nullptr, nullptr}; + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::urCommandBufferExpTest::TearDown()); + } + + static constexpr unsigned elements = 16; + static constexpr size_t allocation_size = elements * sizeof(uint32_t); + + std::array device_ptrs = {nullptr, nullptr}; + std::array buffers = {nullptr, nullptr}; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferCommandsTest); TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMMemcpyExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( - cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, - nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMFillExp) { - uint32_t pattern = 42; - ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( - cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), - allocation_size, 0, nullptr, nullptr)); + uint32_t pattern = 42; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), + allocation_size, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyExp) { - ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( - cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 0, - nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyRectExp) { - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( - cmd_buf_handle, buffers[0], buffers[1], origin, origin, region, 4, 16, - 4, 16, 0, nullptr, nullptr)); + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( + cmd_buf_handle, buffers[0], buffers[1], origin, origin, region, 4, 16, + 4, 16, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadExp) { - std::array host_data{}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( - cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadRectExp) { - std::array host_data{}; - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( - cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + std::array host_data{}; + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, + host_data.data(), 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferWriteExp) { - std::array host_data{}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( - cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferWriteRectExp) { - std::array host_data{}; - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( - cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + std::array host_data{}; + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( + cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, + host_data.data(), 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferFillExp) { - uint32_t pattern = 42; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( - cmd_buf_handle, buffers[0], &pattern, sizeof(pattern), 0, - allocation_size, 0, nullptr, nullptr)); + uint32_t pattern = 42; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + cmd_buf_handle, buffers[0], &pattern, sizeof(pattern), 0, + allocation_size, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMPrefetchExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, + nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMAdviseExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, + nullptr)); } struct urCommandBufferAppendKernelLaunchExpTest : uur::command_buffer::urCommandBufferExpExecutionTest { - virtual void SetUp() override { - program_name = "saxpy_usm"; - UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - } - - int32_t *ptrX = static_cast(shared_ptrs[1]); - int32_t *ptrY = static_cast(shared_ptrs[2]); - for (size_t i = 0; i < global_size; i++) { - ptrX[i] = i; - ptrY[i] = i * 2; - } - - // Index 0 is output - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); - // Index 1 is A - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); - // Index 2 is X - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); - // Index 3 is Y - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); + virtual void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); } - virtual void TearDown() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - } + int32_t *ptrX = static_cast(shared_ptrs[1]); + int32_t *ptrY = static_cast(shared_ptrs[2]); + for (size_t i = 0; i < global_size; i++) { + ptrX[i] = i; + ptrY[i] = i * 2; + } - UUR_RETURN_ON_FATAL_FAILURE( - urCommandBufferExpExecutionTest::TearDown()); + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); + } + + virtual void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } } - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(uint32_t) * global_size; - static constexpr uint32_t A = 42; - std::array shared_ptrs = {nullptr, nullptr, nullptr}; + UUR_RETURN_ON_FATAL_FAILURE( + urCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(uint32_t) * global_size; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr}; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferAppendKernelLaunchExpTest); TEST_P(urCommandBufferAppendKernelLaunchExpTest, Basic) { - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, - &local_size, 0, nullptr, nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr)); - ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); - ASSERT_SUCCESS( - urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); - int32_t *ptrZ = static_cast(shared_ptrs[0]); - for (size_t i = 0; i < global_size; i++) { - uint32_t result = (A * i) + (i * 2); - ASSERT_EQ(result, ptrZ[i]); - } + int32_t *ptrZ = static_cast(shared_ptrs[0]); + for (size_t i = 0; i < global_size; i++) { + uint32_t result = (A * i) + (i * 2); + ASSERT_EQ(result, ptrZ[i]); + } } diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index d3e7e5f79e..2d2d343d31 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -11,150 +11,159 @@ namespace uur { namespace command_buffer { -struct urCommandBufferExpTest : uur::urContextTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); - - size_t returned_size; - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, - nullptr, &returned_size)); - - std::unique_ptr returned_extensions(new char[returned_size]); +static void checkCommandBufferSupport(ur_device_handle_t device) { + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } +} + +static void checkCommandBufferUpdateSupport(ur_device_handle_t device) { + bool updatable_command_buffer_support; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } +} - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, - returned_size, returned_extensions.get(), - nullptr)); - - std::string_view extensions_string(returned_extensions.get()); - bool command_buffer_support = - extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != - std::string::npos; - - if (!command_buffer_support) { - GTEST_SKIP() << "EXP command-buffer feature is not supported."; - } +struct urCommandBufferExpTest : uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); + + ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); + } - ASSERT_SUCCESS(urDeviceGetInfo( - device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; +}; - // Create a command-buffer - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, - &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); +struct urUpdatableCommandBufferExpTest : uur::urQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); + + ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); + + ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + + // Currently there are synchronization issue with immediate submission when used for command buffers. + // So, create queue with batched submission for this test suite if the backend is Level Zero. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { // TODO Remove this workaround + ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; + ur_queue_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, + /*.pNext =*/nullptr, + /*.flags =*/flags, + }; + ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); + ASSERT_NE(queue, nullptr); + } else { + queue = urQueueTest::queue; } + } - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { + ASSERT_SUCCESS(urQueueRelease(queue)); } - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; - ur_bool_t updatable_command_buffer_support = false; + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; + ur_queue_handle_t queue = nullptr; + ur_platform_backend_t backend{}; }; -template +template struct urCommandBufferExpTestWithParam : urQueueTestWithParam { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); - - size_t returned_size; - ASSERT_SUCCESS(urDeviceGetInfo(this->device, UR_DEVICE_INFO_EXTENSIONS, - 0, nullptr, &returned_size)); - - std::unique_ptr returned_extensions(new char[returned_size]); - - ASSERT_SUCCESS(urDeviceGetInfo(this->device, UR_DEVICE_INFO_EXTENSIONS, - returned_size, returned_extensions.get(), - nullptr)); - - std::string_view extensions_string(returned_extensions.get()); - bool command_buffer_support = - extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != - std::string::npos; - - if (!command_buffer_support) { - GTEST_SKIP() << "EXP command-buffer feature is not supported."; - } - - // Create a command-buffer - ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, - nullptr, &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); + + ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(this->device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, + nullptr, &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); } + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); + } - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); - } - - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; }; struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); - - ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, - sizeof(backend), &backend, nullptr)); - - size_t returned_size; - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, - nullptr, &returned_size)); - - std::unique_ptr returned_extensions(new char[returned_size]); - - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, - returned_size, returned_extensions.get(), - nullptr)); - - std::string_view extensions_string(returned_extensions.get()); - bool command_buffer_support = - extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != - std::string::npos; - - if (!command_buffer_support) { - GTEST_SKIP() << "EXP command-buffer feature is not supported."; - } - - ASSERT_SUCCESS(urDeviceGetInfo( - device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); - - // Create a command-buffer - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, - &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + + ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); } + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); + } - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); - } + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; - ur_bool_t updatable_command_buffer_support = false; - ur_platform_backend_t backend{}; }; struct urUpdatableCommandBufferExpExecutionTest - : urCommandBufferExpExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest ::SetUp()); + : uur::urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); - if (!updatable_command_buffer_support) { - GTEST_SKIP() << "Updating EXP command-buffers is not supported."; - } + ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); + ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); - // Create a command-buffer with update enabled. - ur_exp_command_buffer_desc_t desc{ - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); @@ -166,7 +175,7 @@ struct urUpdatableCommandBufferExpExecutionTest EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } UUR_RETURN_ON_FATAL_FAILURE( - urCommandBufferExpExecutionTest::TearDown()); + urKernelExecutionTest::TearDown()); } ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; @@ -174,46 +183,46 @@ struct urUpdatableCommandBufferExpExecutionTest struct urCommandBufferCommandExpTest : urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - // Append 2 kernel commands to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle_2)); - ASSERT_NE(command_handle_2, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // Append 2 kernel commands to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle_2)); + ASSERT_NE(command_handle_2, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); } - void TearDown() override { - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); - } - - if (command_handle_2) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); + if (command_handle_2) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); } - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; - ur_exp_command_buffer_command_handle_t command_handle = nullptr; - ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; + ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; }; } // namespace command_buffer } // namespace uur diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp similarity index 99% rename from test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp rename to test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index 531801c0ee..a12a7903a3 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -3,7 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "fixtures.h" +#include "../fixtures.h" // Test that updating a command-buffer with a single kernel command // taking USM arguments works correctly. diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp similarity index 99% rename from test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp rename to test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index 35cabe44f9..d33ba3a563 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -3,7 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "fixtures.h" +#include "../fixtures.h" // Test that updating a command-buffer with a single kernel command // taking buffer & scalar arguments works correctly. diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp similarity index 99% rename from test/conformance/exp_command_buffer/invalid_update.cpp rename to test/conformance/exp_command_buffer/update/invalid_update.cpp index 78f76c2001..fd9a46c2aa 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -3,7 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "fixtures.h" +#include "../fixtures.h" #include // Negative tests that correct error codes are thrown on invalid update usage. diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp new file mode 100644 index 0000000000..560bf23701 --- /dev/null +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -0,0 +1,291 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../fixtures.h" +#include + +//TODO + +struct TestKernel { + + TestKernel(std::string Name, ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) + : Name(std::move(Name)), Platform(Platform), Context(Context), Device(Device) { + + } + + virtual ~TestKernel() = default; + + virtual void BuildKernel() { + + std::shared_ptr> ILBinary; + std::vector Metadatas{}; + + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(Platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + + ASSERT_NO_FATAL_FAILURE( + uur::KernelsEnvironment::instance->LoadSource(Name, + ILBinary)); + + const ur_program_properties_t properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(Metadatas.size()), + Metadatas.empty() ? nullptr : Metadatas.data()}; + ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram( + Platform, Context, Device, *ILBinary, &properties, &Program)); + + auto KernelNames = + uur::KernelsEnvironment::instance->GetEntryPointNames(Name); + std::string KernelName = KernelNames[0]; + ASSERT_FALSE(KernelName.empty()); + + ASSERT_SUCCESS(urProgramBuild(Context, Program, nullptr)); + ASSERT_SUCCESS(urKernelCreate(Program, KernelName.data(), &Kernel)); + } + + virtual void SetUpKernel() = 0; + + virtual void DestroyKernel() { + ASSERT_SUCCESS(urKernelRelease(Kernel)); + ASSERT_SUCCESS(urProgramRelease(Program)); + }; + + virtual void Validate() = 0; + + std::string Name; + ur_platform_handle_t Platform; + ur_context_handle_t Context; + ur_device_handle_t Device; + ur_program_handle_t Program; + ur_kernel_handle_t Kernel; + +}; + +struct TestSaxpyKernel : public TestKernel { + + TestSaxpyKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) + : TestKernel("saxpy_usm", Platform, Context, + Device) {} + + ~TestSaxpyKernel() override = default; + + void SetUpKernel() override { + + ASSERT_NO_FATAL_FAILURE(BuildKernel()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 0, nullptr, shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(Kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 2, nullptr, shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 3, nullptr, shared_ptrs[2])); + } + + void DestroyKernel() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(Context, shared_ptr)); + } + } + ASSERT_NO_FATAL_FAILURE(TestKernel::DestroyKernel()); + } + + void Validate() override { + // TODO Test that no fatal failure works when the validation fails + for (size_t i = 0; i < global_size; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + const size_t local_size = 4; + const size_t global_size = 32; + const size_t global_offset = 0; + const size_t n_dimensions = 1; + const uint32_t A = 42; + + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; + uint32_t *output = (uint32_t *) shared_ptrs[0]; + uint32_t *X = (uint32_t *) shared_ptrs[1]; + uint32_t *Y = (uint32_t *) shared_ptrs[2]; +}; + +struct TestFill2DKernel : public TestKernel { + + TestFill2DKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) + : TestKernel("fill_usm_2d", Platform, Context, + Device) {} + + ~TestFill2DKernel() override = default; + + void SetUpKernel() override { + ASSERT_NO_FATAL_FAILURE(BuildKernel()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, + allocation_size, &Memory)); + ASSERT_NE(Memory, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(Memory, pattern.data(), allocation_size); + } + + void DestroyKernel() override { + + if (Memory) { + EXPECT_SUCCESS(urUSMFree(Context, Memory)); + } + + ASSERT_NO_FATAL_FAILURE(TestKernel::DestroyKernel()); + } + + void Validate() override { + for (size_t i = 0; i < global_size; i++) { + ASSERT_EQ(static_cast(Memory)[i], Val); + } + } + + size_t local_size = 4; + const size_t size_x = 64; + const size_t size_y = 64; + size_t global_size = size_x * size_y; + size_t global_offset = 0; + const size_t n_dimensions = 2; + + void *Memory; + const uint32_t Val = 42; +}; + +struct KernelHandleUpdateTestBase + : uur::command_buffer::urUpdatableCommandBufferExpTest { + virtual void SetUp() override { + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + SaxpyKernel = std::make_shared(TestSaxpyKernel(platform, context, device)); + FillUSM2DKernel = std::make_shared(TestFill2DKernel(platform, context, device)); + TestKernels.push_back(SaxpyKernel); + TestKernels.push_back(FillUSM2DKernel); + + for (auto &TestKernel : TestKernels) { + UUR_RETURN_ON_FATAL_FAILURE(TestKernel->SetUpKernel()); + } + } + + virtual void TearDown() override { + + for (auto &TestKernel : TestKernels) { + UUR_RETURN_ON_FATAL_FAILURE(TestKernel->DestroyKernel()); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpTest::TearDown()); + } + + std::vector> TestKernels{}; + std::shared_ptr SaxpyKernel; + std::shared_ptr FillUSM2DKernel; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(KernelHandleUpdateTestBase); + +TEST_P(KernelHandleUpdateTestBase, KernelHandleUpdateTest) { + + std::vector KernelAlternatives = {FillUSM2DKernel->Kernel}; + + ur_exp_command_buffer_command_handle_t command_handle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, + SaxpyKernel->Kernel, + SaxpyKernel->n_dimensions, + &(SaxpyKernel->global_offset), + &(SaxpyKernel->global_size), + &(SaxpyKernel->local_size), + KernelAlternatives.size(), + KernelAlternatives.data(), + 0, + nullptr, + nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->Validate()); + + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &FillUSM2DKernel->Memory, // pArgValue + }; + + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &FillUSM2DKernel->Val, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + FillUSM2DKernel->Kernel, + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 2, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + &FillUSM2DKernel->global_offset, // pNewGlobalWorkOffset + &FillUSM2DKernel->global_size, // pNewGlobalWorkSize + &FillUSM2DKernel->local_size, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->Validate()); +} diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp similarity index 99% rename from test/conformance/exp_command_buffer/ndrange_update.cpp rename to test/conformance/exp_command_buffer/update/ndrange_update.cpp index c107dfab5c..4c5ff6449a 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -3,7 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "fixtures.h" +#include "../fixtures.h" #include // Test that updating a command-buffer with a single kernel command diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp new file mode 100644 index 0000000000..31b14e9016 --- /dev/null +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -0,0 +1,357 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct USMFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); + +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Allocate a new USM pointer of larger size if feature is supported. + size_t new_global_size = global_size * 2; + const size_t new_allocation_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + new_allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, new_allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + size_t new_local_size = local_size; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + static_cast(n_dimensions), // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); +} + +// Test updating a command-buffer which hasn't been enqueued yet +TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, global_size, new_val); +} + +// Test updating a command-buffer with multiple USM fill kernel commands +struct USMMultipleFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Create a single USM allocation which will be used by all kernels + // by accessing at pointer offsets + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Append multiple kernel commands to command-buffer + for (size_t k = 0; k < num_kernels; k++) { + // Calculate offset into output allocation, and set as + // kernel output. + void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, offset_ptr)); + + // Each kernel has a unique fill value + uint32_t fill_val = val + k; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), + nullptr, &fill_val)); + + // Append kernel and store returned handle + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &elements, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handles[k])); + ASSERT_NE(command_handles[k], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t num_kernels = 8; + static constexpr size_t elements = global_size / num_kernels; + + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + std::array + command_handles; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); + +// Test updating all the kernels commands in the command-buffer +TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptr; + for (size_t i = 0; i < global_size; i++) { + const uint32_t expected = val + (i / elements); + ASSERT_EQ(expected, output[i]); + } + + // Create a new USM allocation to update kernel outputs to + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Update each kernel in the command-buffer. + uint32_t new_val = 33; + for (size_t k = 0; k < num_kernels; k++) { + // Update output pointer to an offset into new USM allocation + void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &offset_ptr, // pArgValue + }; + + // Update fill value + uint32_t new_fill_val = new_val + k; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(int), // argSize + nullptr, // pProperties + &new_fill_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], + &update_desc)); + } + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *updated_output = (uint32_t *)new_shared_ptr; + for (size_t i = 0; i < global_size; i++) { + uint32_t expected = new_val + (i / elements); + ASSERT_EQ(expected, updated_output[i]) << i; + } +} diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..93fc683127 --- /dev/null +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -0,0 +1,354 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM & scalar arguments works correctly. + +struct USMSaxpyKernelTestBase + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + virtual void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + virtual void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; +}; + +struct USMSaxpyKernelTest : USMSaxpyKernelTestBase { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); + } + + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTest); + +TEST_P(USMSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 1, // numNewValueArgs + 0, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} + +struct USMMultiSaxpyKernelTest : USMSaxpyKernelTestBase { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); + + // Append kernel command to command-buffer and close command-buffer + for (unsigned node = 0; node < nodes; node++) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handles[node])); + ASSERT_NE(command_handles[node], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + for (auto &handle : command_handles) { + if (handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(handle)); + } + } + UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); + } + + static constexpr size_t nodes = 1024; + static constexpr uint32_t A = 42; + std::array command_handles{}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultiSaxpyKernelTest); + +TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 1, // numNewValueArgs + 0, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} + +TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { + // Prepare new inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, //hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 1, // numNewValueArgs + 0, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Run command-buffer prior to update without doing a blocking wait after + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Update kernel and enqueue command-buffer again + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 704c84470e..64f885fb94 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -5,31 +5,14 @@ if (UR_BUILD_ADAPTER_CUDA) add_conformance_test_with_kernels_environment( - exp_enqueue_native_cuda - enqueue_native_cuda.cpp + exp_enqueue_native + enqueue_native_cuda.cpp ) - target_include_directories(test-exp_enqueue_native_cuda PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/cuda - ) - target_link_libraries(test-exp_enqueue_native_cuda PRIVATE cudadrv) -endif() - -if (UR_BUILD_ADAPTER_L0) - add_conformance_test_with_kernels_environment( - exp_enqueue_native_l0 - enqueue_native_level_zero.cpp - ) - target_link_libraries(test-exp_enqueue_native_l0 PRIVATE - LevelZeroLoader - LevelZeroLoader-Headers - ) - - target_include_directories(test-exp_enqueue_native_l0 PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero - LevelZeroLoader-Headers + target_include_directories(test-exp_enqueue_native PRIVATE + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) + target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif() # TODO: Add more tests for different triples From c4075b60f89942c82a81bcab4dffd3633134dd73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 4 Sep 2024 15:14:01 +0100 Subject: [PATCH 08/14] Add binary update implementation for Cuda --- include/ur_api.h | 9 +- scripts/core/exp-command-buffer.yml | 10 +- source/adapters/cuda/command_buffer.cpp | 59 +- source/adapters/cuda/command_buffer.hpp | 11 +- source/adapters/mock/ur_mockddi.cpp | 5 +- source/loader/layers/tracing/ur_trcddi.cpp | 5 +- source/loader/layers/validation/ur_valddi.cpp | 17 +- source/loader/ur_ldrddi.cpp | 5 +- source/loader/ur_libapi.cpp | 9 +- source/ur_api.cpp | 9 +- test/conformance/device_code/fill_usm_2d.cpp | 33 +- .../exp_command_buffer/commands.cpp | 272 ++++----- .../exp_command_buffer_adapter_cuda.match | 1 - .../exp_command_buffer_adapter_hip.match | 1 - ...command_buffer_adapter_level_zero_v2.match | 5 + ...xp_command_buffer_adapter_native_cpu.match | 30 +- .../conformance/exp_command_buffer/fixtures.h | 277 +++++----- .../update/kernel_handle_update.cpp | 518 ++++++++++-------- test/conformance/testing/include/uur/raii.h | 6 + 19 files changed, 714 insertions(+), 568 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 69320f04eb..20d56c4f9c 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8390,6 +8390,9 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` +/// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` +/// + `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` @@ -8405,10 +8408,11 @@ urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. @@ -8936,6 +8940,7 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 2eabb5e782..5ebb3ddeb9 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -319,7 +319,7 @@ params: name: "phKernelAlternatives" desc: | [in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this - command after the command-buffer is finalized. + command after the command-buffer is finalized. It's invalid to specify the default kernel `hKernel` as part of this list. - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -338,7 +338,10 @@ returns: - $X_RESULT_ERROR_INVALID_KERNEL - $X_RESULT_ERROR_INVALID_WORK_DIMENSION - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_INVALID_VALUE: + - "`phKernelAlternatives == NULL && numKernelAlternatives > 0`" + - "`phKernelAlternatives != NULL && numKernelAlternatives == 0`" + - "`phKernelAlternatives` contains `hKernel`" - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" @@ -939,7 +942,8 @@ returns: - $X_RESULT_ERROR_INVALID_ENUMERATION - $X_RESULT_ERROR_INVALID_WORK_DIMENSION - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_INVALID_VALUE: + - "If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of $xCommandBufferAppendKernelLaunchExp when this command was created." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index ca1737c936..90d2e17862 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -76,9 +76,11 @@ ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives) + : CommandBuffer(CommandBuffer), Kernel(Kernel), ValidKernelHandles(), + Node(Node), Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; @@ -96,6 +98,13 @@ ur_exp_command_buffer_command_handle_t_:: std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); } + + /* Add the default Kernel as a valid kernel handle for this command */ + ValidKernelHandles.insert(Kernel); + if (KernelAlternatives) { + ValidKernelHandles.insert(KernelAlternatives, + KernelAlternatives + NumKernelAlternatives); + } } /// Helper function for finding the Cuda Nodes associated with the @@ -344,8 +353,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t /*numKernelAlternatives*/, - ur_kernel_handle_t * /*phKernelAlternatives*/, + uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, @@ -356,6 +364,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + for (uint32_t i = 0; i < numKernelAlternatives; ++i) { + UR_ASSERT(phKernelAlternatives[i] != hKernel, + UR_RESULT_ERROR_INVALID_VALUE); + } + CUgraphNode GraphNode; std::vector DepsList; @@ -420,8 +433,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, GraphNode, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, GraphNode, + NodeParams, workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numKernelAlternatives, + phKernelAlternatives}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); @@ -865,10 +880,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { - // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } // Error If Local size and not global size if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && @@ -888,7 +899,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Kernel corresponding to the command to update - ur_kernel_handle_t Kernel = hCommand->Kernel; + ur_kernel_handle_t NewKernel = pUpdateKernelLaunch->hNewKernel; + + if (hCommand->ValidKernelHandles.count(NewKernel)) { + hCommand->Kernel = NewKernel; + } else { + return UR_RESULT_ERROR_INVALID_VALUE; + } // Update pointer arguments to the kernel uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; @@ -901,7 +918,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( ur_result_t Result = UR_RESULT_SUCCESS; try { - Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + NewKernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); } catch (ur_result_t Err) { Result = Err; return Result; @@ -920,11 +937,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( ur_result_t Result = UR_RESULT_SUCCESS; try { if (ArgValue == nullptr) { - Kernel->setKernelArg(ArgIndex, 0, nullptr); + NewKernel->setKernelArg(ArgIndex, 0, nullptr); } else { CUdeviceptr CuPtr = std::get(ArgValue->Mem).getPtr(CommandBuffer->Device); - Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); + NewKernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); } } catch (ur_result_t Err) { Result = Err; @@ -945,7 +962,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( ur_result_t Result = UR_RESULT_SUCCESS; try { - Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue); } catch (ur_result_t Err) { Result = Err; return Result; @@ -985,12 +1002,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( // by default unless user has provided a better number size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - CUfunction CuFunc = Kernel->get(); + CUfunction CuFunc = NewKernel->get(); ur_context_handle_t Context = CommandBuffer->Context; ur_device_handle_t Device = CommandBuffer->Device; auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, - GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, - ThreadsPerBlock, BlocksPerGrid); + GlobalWorkSize, LocalWorkSize, NewKernel, + CuFunc, ThreadsPerBlock, BlocksPerGrid); if (Result != UR_RESULT_SUCCESS) { return Result; } @@ -1004,8 +1021,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDimX = ThreadsPerBlock[0]; Params.blockDimY = ThreadsPerBlock[1]; Params.blockDimZ = ThreadsPerBlock[2]; - Params.sharedMemBytes = Kernel->getLocalSize(); - Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + Params.sharedMemBytes = NewKernel->getLocalSize(); + Params.kernelParams = const_cast(NewKernel->getArgIndices().data()); CUgraphNode Node = hCommand->Node; CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 504095612b..49e3ba8b25 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -16,6 +16,7 @@ #include "logger/ur_logger.hpp" #include #include +#include // Trace an internal UR call #define UR_TRACE(Call) \ @@ -44,7 +45,8 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr); + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives); void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { const size_t CopySize = sizeof(size_t) * WorkDim; @@ -96,7 +98,14 @@ struct ur_exp_command_buffer_command_handle_t_ { } ur_exp_command_buffer_handle_t CommandBuffer; + + /* The currently active kernel handle for this command */ ur_kernel_handle_t Kernel; + + /* Set of all the kernel handles that can be used when updating this command + */ + std::unordered_set ValidKernelHandles; + CUgraphNode Node; CUDA_KERNEL_NODE_PARAMS Params; diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 594a173ff1..876e895322 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8352,11 +8352,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 4be0fba5b0..f21320b830 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6496,11 +6496,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index eb13922c9f..1c6dbb1392 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8057,11 +8057,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8096,6 +8097,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (phKernelAlternatives == NULL && numKernelAlternatives > 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + if (phKernelAlternatives != NULL && numKernelAlternatives == 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + if (phKernelAlternatives` contains `hKernel) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 5d4d2ee380..20a5e8acfa 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7108,11 +7108,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index a981d4f032..89e4a3788c 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7525,6 +7525,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` +/// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` +/// + `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` @@ -7544,11 +7547,12 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8321,6 +8325,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 7fbe274644..967121dd31 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6381,6 +6381,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` +/// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` +/// + `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` @@ -6400,11 +6403,12 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in - ///< pKernelAlternatives. + ///< phKernelAlternatives. ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. + ///< command after the command-buffer is finalized. It's invalid to specify + ///< the default kernel `hKernel` as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7034,6 +7038,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( diff --git a/test/conformance/device_code/fill_usm_2d.cpp b/test/conformance/device_code/fill_usm_2d.cpp index 76fc0ae7ea..2cfba67884 100644 --- a/test/conformance/device_code/fill_usm_2d.cpp +++ b/test/conformance/device_code/fill_usm_2d.cpp @@ -7,24 +7,25 @@ int main() { - size_t nd_range_x = 8; - size_t nd_range_y = 8; + size_t nd_range_x = 8; + size_t nd_range_y = 8; - auto nd_range = sycl::range<2>(nd_range_x, nd_range_y); + auto nd_range = sycl::range<2>(nd_range_x, nd_range_y); - std::vector A(nd_range_x * nd_range_y, 1); - uint32_t val = 42; - sycl::queue sycl_queue; + std::vector A(nd_range_x * nd_range_y, 1); + uint32_t val = 42; + sycl::queue sycl_queue; - auto work_range = sycl::nd_range<2>(nd_range, sycl::range<2>(1, 1)); + auto work_range = sycl::nd_range<2>(nd_range, sycl::range<2>(1, 1)); - uint32_t *data = sycl::malloc_shared(nd_range_x * nd_range_y, sycl_queue); - sycl_queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for( - work_range, [data, val](sycl::nd_item<2> item_id) { - auto id = item_id.get_global_linear_id(); - data[id] = val; - }); - }); - return 0; + uint32_t *data = + sycl::malloc_shared(nd_range_x * nd_range_y, sycl_queue); + sycl_queue.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + work_range, [data, val](sycl::nd_item<2> item_id) { + auto id = item_id.get_global_linear_id(); + data[id] = val; + }); + }); + return 0; } diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 4066a1d3a2..c2384d982b 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -8,197 +8,197 @@ struct urCommandBufferCommandsTest : uur::command_buffer::urCommandBufferExpTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE( - uur::command_buffer::urCommandBufferExpTest::SetUp()); - - // Allocate USM pointers - for (auto &device_ptr : device_ptrs) { - ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, - allocation_size, &device_ptr)); - ASSERT_NE(device_ptr, nullptr); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::urCommandBufferExpTest::SetUp()); + + // Allocate USM pointers + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); + } + + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + + ASSERT_NE(buffer, nullptr); + } } - for (auto &buffer : buffers) { - ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, - allocation_size, nullptr, - &buffer)); - - ASSERT_NE(buffer, nullptr); - } - } - - void TearDown() override { - for (auto &device_ptr : device_ptrs) { - if (device_ptr) { - EXPECT_SUCCESS(urUSMFree(context, device_ptr)); - } + void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } + } + + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::urCommandBufferExpTest::TearDown()); } - for (auto &buffer : buffers) { - if (buffer) { - EXPECT_SUCCESS(urMemRelease(buffer)); - } - } + static constexpr unsigned elements = 16; + static constexpr size_t allocation_size = elements * sizeof(uint32_t); - UUR_RETURN_ON_FATAL_FAILURE( - uur::command_buffer::urCommandBufferExpTest::TearDown()); - } - - static constexpr unsigned elements = 16; - static constexpr size_t allocation_size = elements * sizeof(uint32_t); - - std::array device_ptrs = {nullptr, nullptr}; - std::array buffers = {nullptr, nullptr}; + std::array device_ptrs = {nullptr, nullptr}; + std::array buffers = {nullptr, nullptr}; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferCommandsTest); TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMMemcpyExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( - cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, - nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMFillExp) { - uint32_t pattern = 42; - ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( - cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), - allocation_size, 0, nullptr, nullptr)); + uint32_t pattern = 42; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), + allocation_size, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyExp) { - ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( - cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 0, - nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyRectExp) { - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( - cmd_buf_handle, buffers[0], buffers[1], origin, origin, region, 4, 16, - 4, 16, 0, nullptr, nullptr)); + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( + cmd_buf_handle, buffers[0], buffers[1], origin, origin, region, 4, 16, + 4, 16, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadExp) { - std::array host_data{}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( - cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadRectExp) { - std::array host_data{}; - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( - cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + std::array host_data{}; + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, + host_data.data(), 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferWriteExp) { - std::array host_data{}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( - cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferWriteRectExp) { - std::array host_data{}; - ur_rect_offset_t origin{0, 0, 0}; - ur_rect_region_t region{4, 4, 1}; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( - cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + std::array host_data{}; + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{4, 4, 1}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( + cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, + host_data.data(), 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferFillExp) { - uint32_t pattern = 42; - ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( - cmd_buf_handle, buffers[0], &pattern, sizeof(pattern), 0, - allocation_size, 0, nullptr, nullptr)); + uint32_t pattern = 42; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + cmd_buf_handle, buffers[0], &pattern, sizeof(pattern), 0, + allocation_size, 0, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMPrefetchExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, + nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMAdviseExp) { - ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, + nullptr)); } struct urCommandBufferAppendKernelLaunchExpTest : uur::command_buffer::urCommandBufferExpExecutionTest { - virtual void SetUp() override { - program_name = "saxpy_usm"; - UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); + virtual void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + } + + int32_t *ptrX = static_cast(shared_ptrs[1]); + int32_t *ptrY = static_cast(shared_ptrs[2]); + for (size_t i = 0; i < global_size; i++) { + ptrX[i] = i; + ptrY[i] = i * 2; + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); } - int32_t *ptrX = static_cast(shared_ptrs[1]); - int32_t *ptrY = static_cast(shared_ptrs[2]); - for (size_t i = 0; i < global_size; i++) { - ptrX[i] = i; - ptrY[i] = i * 2; - } + virtual void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } - // Index 0 is output - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); - // Index 1 is A - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); - // Index 2 is X - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); - // Index 3 is Y - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); - } - - virtual void TearDown() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } + UUR_RETURN_ON_FATAL_FAILURE( + urCommandBufferExpExecutionTest::TearDown()); } - UUR_RETURN_ON_FATAL_FAILURE( - urCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(uint32_t) * global_size; - static constexpr uint32_t A = 42; - std::array shared_ptrs = {nullptr, nullptr, nullptr}; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(uint32_t) * global_size; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr}; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferAppendKernelLaunchExpTest); TEST_P(urCommandBufferAppendKernelLaunchExpTest, Basic) { - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, - &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr)); - ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); - ASSERT_SUCCESS( - urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); - int32_t *ptrZ = static_cast(shared_ptrs[0]); - for (size_t i = 0; i < global_size; i++) { - uint32_t result = (A * i) + (i * 2); - ASSERT_EQ(result, ptrZ[i]); - } + int32_t *ptrZ = static_cast(shared_ptrs[0]); + for (size_t i = 0; i < global_size; i++) { + uint32_t result = (A * i) + (i * 2); + ASSERT_EQ(result, ptrZ[i]); + } } diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match index 8b13789179..e69de29bb2 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match @@ -1 +0,0 @@ - diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match index 8b13789179..e69de29bb2 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -1 +0,0 @@ - diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index 7c222d70a6..f997810ca5 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -55,3 +55,8 @@ urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Z urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 +urCommandBufferKernelHandleUpdateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.UpdateAgain/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ + diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index 2508f92fed..096a052315 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -1,28 +1,28 @@ +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferFillCommandTest.UpdateGlobalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferFillCommandTest.SeparateUpdateCalls/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferFillCommandTest.OverrideUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferFillCommandTest.OverrideArgList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Invalid/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 2d2d343d31..f81b664d7f 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -12,129 +12,148 @@ namespace uur { namespace command_buffer { static void checkCommandBufferSupport(ur_device_handle_t device) { - size_t returned_size; - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, - nullptr, &returned_size)); + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); - std::unique_ptr returned_extensions(new char[returned_size]); + std::unique_ptr returned_extensions(new char[returned_size]); - ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, - returned_size, returned_extensions.get(), - nullptr)); + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); - std::string_view extensions_string(returned_extensions.get()); - bool command_buffer_support = - extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != - std::string::npos; + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; - if (!command_buffer_support) { - GTEST_SKIP() << "EXP command-buffer feature is not supported."; - } + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } } static void checkCommandBufferUpdateSupport(ur_device_handle_t device) { - bool updatable_command_buffer_support; - ASSERT_SUCCESS(urDeviceGetInfo( - device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + bool updatable_command_buffer_support; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); - if (!updatable_command_buffer_support) { - GTEST_SKIP() << "Updating EXP command-buffers is not supported."; - } + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } } struct urCommandBufferExpTest : uur::urContextTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, - &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); - } + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); } - UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); - } - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; }; -struct urUpdatableCommandBufferExpTest : uur::urQueueTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); - - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - - ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); +template +struct urCommandBufferExpTestWithParam : urQueueTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); - // Create a command-buffer with update enabled. - ur_exp_command_buffer_desc_t desc{ - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; - - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, - &updatable_cmd_buf_handle)); - ASSERT_NE(updatable_cmd_buf_handle, nullptr); - - // Currently there are synchronization issue with immediate submission when used for command buffers. - // So, create queue with batched submission for this test suite if the backend is Level Zero. - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { // TODO Remove this workaround - ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/flags, - }; - ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); - ASSERT_NE(queue, nullptr); - } else { - queue = urQueueTest::queue; + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(this->device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, + nullptr, &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); } - } - void TearDown() override { - if (updatable_cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); } - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; +}; + +struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); } - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); - } + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); + } - ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; - ur_queue_handle_t queue = nullptr; - ur_platform_backend_t backend{}; + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; }; -template -struct urCommandBufferExpTestWithParam : urQueueTestWithParam { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); +struct urUpdatableCommandBufferExpTest : uur::urQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(this->device)); - ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, - nullptr, &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); - } + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + + // Currently there are synchronization issue with immediate submission when used for command buffers. + // So, create queue with batched submission for this test suite if the backend is Level Zero. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; + ur_queue_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, + /*.pNext =*/nullptr, + /*.flags =*/flags, + }; + ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); + ASSERT_NE(queue, nullptr); + } else { + queue = urQueueTest::queue; + } } - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); - } - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { + ASSERT_SUCCESS(urQueueRelease(queue)); + } + + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; + ur_queue_handle_t queue = nullptr; + ur_platform_backend_t backend{}; }; -struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); +struct urUpdatableCommandBufferExpExecutionTest : uur::urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, @@ -161,9 +180,9 @@ struct urUpdatableCommandBufferExpExecutionTest ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); - // Create a command-buffer with update enabled. - ur_exp_command_buffer_desc_t desc{ - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); @@ -183,46 +202,46 @@ struct urUpdatableCommandBufferExpExecutionTest struct urCommandBufferCommandExpTest : urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - // Append 2 kernel commands to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle_2)); - ASSERT_NE(command_handle_2, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void TearDown() override { - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // Append 2 kernel commands to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + &command_handle_2)); + ASSERT_NE(command_handle_2, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); } - if (command_handle_2) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); - } + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } + if (command_handle_2) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; - ur_exp_command_buffer_command_handle_t command_handle = nullptr; - ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; + ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; }; } // namespace command_buffer } // namespace uur diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index 560bf23701..4aac942231 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -4,288 +4,344 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "../fixtures.h" +#include "uur/raii.h" #include -//TODO - +// Tests that it is possible to update the kernel handle of a command-buffer node. +// This test launches a Saxpy kernel using a command-buffer and then updates the +// node with a completely different kernel that does a fill 2D operation. struct TestKernel { - TestKernel(std::string Name, ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) - : Name(std::move(Name)), Platform(Platform), Context(Context), Device(Device) { - - } - - virtual ~TestKernel() = default; - - virtual void BuildKernel() { + TestKernel(std::string Name, ur_platform_handle_t Platform, + ur_context_handle_t Context, ur_device_handle_t Device) + : Name(std::move(Name)), Platform(Platform), Context(Context), + Device(Device) {} - std::shared_ptr> ILBinary; - std::vector Metadatas{}; + virtual ~TestKernel() = default; - ur_platform_backend_t backend; - ASSERT_SUCCESS(urPlatformGetInfo(Platform, UR_PLATFORM_INFO_BACKEND, - sizeof(backend), &backend, nullptr)); + virtual void buildKernel() { + std::shared_ptr> ILBinary; + std::vector Metadatas{}; - ASSERT_NO_FATAL_FAILURE( - uur::KernelsEnvironment::instance->LoadSource(Name, - ILBinary)); + ur_platform_backend_t Backend; + ASSERT_SUCCESS(urPlatformGetInfo(Platform, UR_PLATFORM_INFO_BACKEND, + sizeof(Backend), &Backend, nullptr)); - const ur_program_properties_t properties = { - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, - static_cast(Metadatas.size()), - Metadatas.empty() ? nullptr : Metadatas.data()}; - ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram( - Platform, Context, Device, *ILBinary, &properties, &Program)); + ASSERT_NO_FATAL_FAILURE( + uur::KernelsEnvironment::instance->LoadSource(Name, ILBinary)); - auto KernelNames = - uur::KernelsEnvironment::instance->GetEntryPointNames(Name); - std::string KernelName = KernelNames[0]; - ASSERT_FALSE(KernelName.empty()); + const ur_program_properties_t Properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(Metadatas.size()), + Metadatas.empty() ? nullptr : Metadatas.data()}; + ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram( + Platform, Context, Device, *ILBinary, &Properties, &Program)); - ASSERT_SUCCESS(urProgramBuild(Context, Program, nullptr)); - ASSERT_SUCCESS(urKernelCreate(Program, KernelName.data(), &Kernel)); - } + auto KernelNames = + uur::KernelsEnvironment::instance->GetEntryPointNames(Name); + std::string KernelName = KernelNames[0]; + ASSERT_FALSE(KernelName.empty()); - virtual void SetUpKernel() = 0; + ASSERT_SUCCESS(urProgramBuild(Context, Program, nullptr)); + ASSERT_SUCCESS(urKernelCreate(Program, KernelName.data(), &Kernel)); + } - virtual void DestroyKernel() { - ASSERT_SUCCESS(urKernelRelease(Kernel)); - ASSERT_SUCCESS(urProgramRelease(Program)); - }; + virtual void setUpKernel() = 0; - virtual void Validate() = 0; + virtual void destroyKernel() { + ASSERT_SUCCESS(urKernelRelease(Kernel)); + ASSERT_SUCCESS(urProgramRelease(Program)); + }; - std::string Name; - ur_platform_handle_t Platform; - ur_context_handle_t Context; - ur_device_handle_t Device; - ur_program_handle_t Program; - ur_kernel_handle_t Kernel; + virtual void validate() = 0; + std::string Name; + ur_platform_handle_t Platform; + ur_context_handle_t Context; + ur_device_handle_t Device; + ur_program_handle_t Program; + ur_kernel_handle_t Kernel; }; struct TestSaxpyKernel : public TestKernel { - TestSaxpyKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) - : TestKernel("saxpy_usm", Platform, Context, - Device) {} + TestSaxpyKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, + ur_device_handle_t Device) + : TestKernel("saxpy_usm", Platform, Context, Device) {} + + ~TestSaxpyKernel() override = default; - ~TestSaxpyKernel() override = default; + void setUpKernel() override { - void SetUpKernel() override { + ASSERT_NO_FATAL_FAILURE(buildKernel()); - ASSERT_NO_FATAL_FAILURE(BuildKernel()); + const size_t AllocationSize = sizeof(uint32_t) * GlobalSize; + for (auto &SharedPtr : Memory) { + ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, + AllocationSize, &SharedPtr)); + ASSERT_NE(SharedPtr, nullptr); - const size_t allocation_size = sizeof(uint32_t) * global_size; - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); + std::vector pattern(AllocationSize); + uur::generateMemFillPattern(pattern); + std::memcpy(SharedPtr, pattern.data(), AllocationSize); + } - std::vector pattern(allocation_size); - uur::generateMemFillPattern(pattern); - std::memcpy(shared_ptr, pattern.data(), allocation_size); + // Index 0 is the output + ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 0, nullptr, Memory[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(Kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 2, nullptr, Memory[1])); + // Index 3 is Y + ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 3, nullptr, Memory[2])); } - // Index 0 is output - ASSERT_SUCCESS( - urKernelSetArgPointer(Kernel, 0, nullptr, shared_ptrs[0])); - // Index 1 is A - ASSERT_SUCCESS(urKernelSetArgValue(Kernel, 1, sizeof(A), nullptr, &A)); - // Index 2 is X - ASSERT_SUCCESS( - urKernelSetArgPointer(Kernel, 2, nullptr, shared_ptrs[1])); - // Index 3 is Y - ASSERT_SUCCESS( - urKernelSetArgPointer(Kernel, 3, nullptr, shared_ptrs[2])); - } - - void DestroyKernel() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(Context, shared_ptr)); - } + void destroyKernel() override { + for (auto &shared_ptr : Memory) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(Context, shared_ptr)); + } + } + ASSERT_NO_FATAL_FAILURE(TestKernel::destroyKernel()); } - ASSERT_NO_FATAL_FAILURE(TestKernel::DestroyKernel()); - } - - void Validate() override { - // TODO Test that no fatal failure works when the validation fails - for (size_t i = 0; i < global_size; i++) { - uint32_t result = A * X[i] + Y[i]; - ASSERT_EQ(result, output[i]); + + void validate() override { + auto *output = static_cast(Memory[0]); + auto *X = static_cast(Memory[1]); + auto *Y = static_cast(Memory[2]); + + for (size_t i = 0; i < GlobalSize; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } } - } - - const size_t local_size = 4; - const size_t global_size = 32; - const size_t global_offset = 0; - const size_t n_dimensions = 1; - const uint32_t A = 42; - - std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; - uint32_t *output = (uint32_t *) shared_ptrs[0]; - uint32_t *X = (uint32_t *) shared_ptrs[1]; - uint32_t *Y = (uint32_t *) shared_ptrs[2]; + + const size_t LocalSize = 4; + const size_t GlobalSize = 32; + const size_t GlobalOffset = 0; + const size_t NDimensions = 1; + const uint32_t A = 42; + + std::array Memory = {nullptr, nullptr, nullptr}; }; struct TestFill2DKernel : public TestKernel { - TestFill2DKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, ur_device_handle_t Device) - : TestKernel("fill_usm_2d", Platform, Context, - Device) {} + TestFill2DKernel(ur_platform_handle_t Platform, ur_context_handle_t Context, + ur_device_handle_t Device) + : TestKernel("fill_usm_2d", Platform, Context, Device) {} + + ~TestFill2DKernel() override = default; + + void setUpKernel() override { + ASSERT_NO_FATAL_FAILURE(buildKernel()); + + const size_t allocation_size = sizeof(uint32_t) * SizeX * SizeY; + ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, + allocation_size, &Memory)); + ASSERT_NE(Memory, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(Memory, pattern.data(), allocation_size); + + UpdatePointerDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &Memory, // pArgValue + }; + + UpdateValDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(Val), // argSize + nullptr, // pProperties + &Val, // hArgValue + }; + + UpdateDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + Kernel, // hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + NDimensions, // newWorkDim + nullptr, // pNewMemObjArgList + &UpdatePointerDesc, // pNewPointerArgList + &UpdateValDesc, // pNewValueArgList + GlobalOffset.data(), // pNewGlobalWorkOffset + GlobalSize.data(), // pNewGlobalWorkSize + LocalSize.data(), // pNewLocalWorkSize + }; + } - ~TestFill2DKernel() override = default; + void destroyKernel() override { + if (Memory) { + EXPECT_SUCCESS(urUSMFree(Context, Memory)); + } + ASSERT_NO_FATAL_FAILURE(TestKernel::destroyKernel()); + } - void SetUpKernel() override { - ASSERT_NO_FATAL_FAILURE(BuildKernel()); + void validate() override { + for (size_t i = 0; i < SizeX * SizeY; i++) { + ASSERT_EQ(static_cast(Memory)[i], Val); + } + } - const size_t allocation_size = sizeof(uint32_t) * global_size; - ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, - allocation_size, &Memory)); - ASSERT_NE(Memory, nullptr); + ur_exp_command_buffer_update_pointer_arg_desc_t UpdatePointerDesc; + ur_exp_command_buffer_update_value_arg_desc_t UpdateValDesc; + ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc; - std::vector pattern(allocation_size); - uur::generateMemFillPattern(pattern); - std::memcpy(Memory, pattern.data(), allocation_size); - } + std::vector LocalSize = {4, 4}; + const size_t SizeX = 64; + const size_t SizeY = 64; + std::vector GlobalSize = {SizeX, SizeY}; + std::vector GlobalOffset = {0, 0}; + uint32_t NDimensions = 2; - void DestroyKernel() override { + void *Memory; + uint32_t Val = 42; +}; - if (Memory) { - EXPECT_SUCCESS(urUSMFree(Context, Memory)); +struct urCommandBufferKernelHandleUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpTest { + virtual void SetUp() override { + + UUR_RETURN_ON_FATAL_FAILURE(urUpdatableCommandBufferExpTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + SaxpyKernel = std::make_shared( + TestSaxpyKernel(platform, context, device)); + FillUSM2DKernel = std::make_shared( + TestFill2DKernel(platform, context, device)); + TestKernels.push_back(SaxpyKernel); + TestKernels.push_back(FillUSM2DKernel); + + for (auto &TestKernel : TestKernels) { + UUR_RETURN_ON_FATAL_FAILURE(TestKernel->setUpKernel()); + } } - ASSERT_NO_FATAL_FAILURE(TestKernel::DestroyKernel()); - } - - void Validate() override { - for (size_t i = 0; i < global_size; i++) { - ASSERT_EQ(static_cast(Memory)[i], Val); + virtual void TearDown() override { + for (auto &TestKernel : TestKernels) { + UUR_RETURN_ON_FATAL_FAILURE(TestKernel->destroyKernel()); + } + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpTest::TearDown()); } - } - size_t local_size = 4; - const size_t size_x = 64; - const size_t size_y = 64; - size_t global_size = size_x * size_y; - size_t global_offset = 0; - const size_t n_dimensions = 2; - - void *Memory; - const uint32_t Val = 42; + std::vector> TestKernels{}; + std::shared_ptr SaxpyKernel; + std::shared_ptr FillUSM2DKernel; }; -struct KernelHandleUpdateTestBase - : uur::command_buffer::urUpdatableCommandBufferExpTest { - virtual void SetUp() override { +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferKernelHandleUpdateTest); - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpTest::SetUp()); +TEST_P(urCommandBufferKernelHandleUpdateTest, Success) { - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } + std::vector KernelAlternatives = { + FillUSM2DKernel->Kernel}; - SaxpyKernel = std::make_shared(TestSaxpyKernel(platform, context, device)); - FillUSM2DKernel = std::make_shared(TestFill2DKernel(platform, context, device)); - TestKernels.push_back(SaxpyKernel); - TestKernels.push_back(FillUSM2DKernel); + uur::raii::CommandBufferCommand CommandHandle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, + &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + KernelAlternatives.data(), 0, nullptr, nullptr, CommandHandle.ptr())); + ASSERT_NE(CommandHandle, nullptr); - for (auto &TestKernel : TestKernels) { - UUR_RETURN_ON_FATAL_FAILURE(TestKernel->SetUpKernel()); - } - } + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - virtual void TearDown() override { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); - for (auto &TestKernel : TestKernels) { - UUR_RETURN_ON_FATAL_FAILURE(TestKernel->DestroyKernel()); - } + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); +} - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpTest::TearDown()); - } +/* Test that updates to the command kernel handle are stored in the command handle */ +TEST_P(urCommandBufferKernelHandleUpdateTest, UpdateAgain) { + + std::vector KernelAlternatives = { + FillUSM2DKernel->Kernel}; + + uur::raii::CommandBufferCommand CommandHandle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, + &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + KernelAlternatives.data(), 0, nullptr, nullptr, CommandHandle.ptr())); + ASSERT_NE(CommandHandle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); + + // If the Kernel was not stored properly in the command, then this could potentially fail since + // it would try to use the Saxpy kernel + FillUSM2DKernel->Val = 78; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); +} - std::vector> TestKernels{}; - std::shared_ptr SaxpyKernel; - std::shared_ptr FillUSM2DKernel; -}; +TEST_P(urCommandBufferKernelHandleUpdateTest, KernelAlternativeNotRegistered) { + + uur::raii::CommandBufferCommand CommandHandle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, + &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), + &(SaxpyKernel->LocalSize), 0, nullptr, 0, nullptr, nullptr, + CommandHandle.ptr())); + ASSERT_NE(CommandHandle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_VALUE, + urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); +} + +TEST_P(urCommandBufferKernelHandleUpdateTest, + RegisterInvalidKernelAlternative) { + + std::vector KernelAlternatives = {SaxpyKernel->Kernel}; -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(KernelHandleUpdateTestBase); - -TEST_P(KernelHandleUpdateTestBase, KernelHandleUpdateTest) { - - std::vector KernelAlternatives = {FillUSM2DKernel->Kernel}; - - ur_exp_command_buffer_command_handle_t command_handle; - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, - SaxpyKernel->Kernel, - SaxpyKernel->n_dimensions, - &(SaxpyKernel->global_offset), - &(SaxpyKernel->global_size), - &(SaxpyKernel->local_size), - KernelAlternatives.size(), - KernelAlternatives.data(), - 0, - nullptr, - nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - ASSERT_NO_FATAL_FAILURE(SaxpyKernel->Validate()); - - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &FillUSM2DKernel->Memory, // pArgValue - }; - - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &FillUSM2DKernel->Val, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - FillUSM2DKernel->Kernel, - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - 2, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - &FillUSM2DKernel->global_offset, // pNewGlobalWorkOffset - &FillUSM2DKernel->global_size, // pNewGlobalWorkSize - &FillUSM2DKernel->local_size, // pNewLocalWorkSize - }; - - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->Validate()); + ur_exp_command_buffer_command_handle_t CommandHandle; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_VALUE, + urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, + SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), + &(SaxpyKernel->GlobalSize), &(SaxpyKernel->LocalSize), + KernelAlternatives.size(), KernelAlternatives.data(), + 0, nullptr, nullptr, &CommandHandle)); } diff --git a/test/conformance/testing/include/uur/raii.h b/test/conformance/testing/include/uur/raii.h index e4f456ec36..894a66dfdd 100644 --- a/test/conformance/testing/include/uur/raii.h +++ b/test/conformance/testing/include/uur/raii.h @@ -108,6 +108,12 @@ using Program = Wrapper; using Kernel = Wrapper; using Queue = Wrapper; using Event = Wrapper; +using CommandBuffer = + Wrapper; +using CommandBufferCommand = + Wrapper; } // namespace raii } // namespace uur From d9d48cf147ccc49a7121a46049b01b8883a0253d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 4 Sep 2024 15:34:52 +0100 Subject: [PATCH 09/14] Fix mistakes in tests --- include/ur_api.h | 2 +- scripts/core/exp-command-buffer.yml | 2 +- source/adapters/cuda/command_buffer.cpp | 2 +- source/adapters/level_zero/v2/api.cpp | 1 + source/loader/layers/validation/ur_valddi.cpp | 4 - source/loader/ur_libapi.cpp | 2 +- source/ur_api.cpp | 2 +- .../exp_command_buffer_adapter_hip.match | 4 + ...command_buffer_adapter_level_zero_v2.match | 32 -- .../update/kernel_handle_update.cpp | 6 +- .../usm_fill_kernel_update.cpp | 357 ------------------ .../usm_saxpy_kernel_update.cpp | 354 ----------------- .../exp_enqueue_native/CMakeLists.txt | 8 +- 13 files changed, 17 insertions(+), 759 deletions(-) delete mode 100644 test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp delete mode 100644 test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp diff --git a/include/ur_api.h b/include/ur_api.h index 20d56c4f9c..d9ec083906 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8392,7 +8392,7 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 5ebb3ddeb9..d3f5a95bc8 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -341,7 +341,7 @@ returns: - $X_RESULT_ERROR_INVALID_VALUE: - "`phKernelAlternatives == NULL && numKernelAlternatives > 0`" - "`phKernelAlternatives != NULL && numKernelAlternatives == 0`" - - "`phKernelAlternatives` contains `hKernel`" + - "If `phKernelAlternatives` contains `hKernel`" - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 90d2e17862..0a6f0015e8 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -879,7 +879,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + if (pUpdateKernelLaunch->newWorkDim) { // Error If Local size and not global size if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index eba7359379..b54ae2d84a 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -511,6 +511,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 1c6dbb1392..b05194bef1 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8105,10 +8105,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_VALUE; } - if (phKernelAlternatives` contains `hKernel) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 89e4a3788c..36e61ba09d 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7527,7 +7527,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 967121dd31..0babfaf8ae 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6383,7 +6383,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match index e69de29bb2..a39a452d04 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,4 @@ +urCommandBufferKernelHandleUpdateTest.Success/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.UpdateAgain/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index f997810ca5..8a8eff0cf5 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -1,34 +1,7 @@ -BufferFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.UpdateGlobalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.SeparateUpdateCalls/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideArgList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultipleFillCommandTest.UpdateAllKernels/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateWithoutBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Invalid/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotFinalizedCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotUpdatableCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.GlobalLocalSizeMistach/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.ImplToUserDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.UserToImplDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ @@ -55,8 +28,3 @@ urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Z urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urCommandBufferKernelHandleUpdateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.UpdateAgain/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ - diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index 4aac942231..a533786917 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -7,9 +7,6 @@ #include "uur/raii.h" #include -// Tests that it is possible to update the kernel handle of a command-buffer node. -// This test launches a Saxpy kernel using a command-buffer and then updates the -// node with a completely different kernel that does a fill 2D operation. struct TestKernel { TestKernel(std::string Name, ur_platform_handle_t Platform, @@ -247,6 +244,9 @@ struct urCommandBufferKernelHandleUpdateTest UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferKernelHandleUpdateTest); +/* Tests that it is possible to update the kernel handle of a command-buffer node. + * This test launches a Saxpy kernel using a command-buffer and then updates the + * node with a completely different kernel that does a fill 2D operation. */ TEST_P(urCommandBufferKernelHandleUpdateTest, Success) { std::vector KernelAlternatives = { diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp deleted file mode 100644 index 6a86f30cea..0000000000 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ /dev/null @@ -1,357 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "fixtures.h" -#include - -// Test that updating a command-buffer with a single kernel command -// taking USM arguments works correctly. -struct USMFillCommandTest - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - program_name = "fill_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - // Allocate USM pointer to fill - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - std::memset(shared_ptr, 0, allocation_size); - - // Index 0 is output - ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); - // Index 1 is input scalar - ASSERT_SUCCESS( - urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); - - // Append kernel command to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void Validate(uint32_t *pointer, size_t length, uint32_t val) { - for (size_t i = 0; i < length; i++) { - ASSERT_EQ(pointer[i], val); - } - } - - void TearDown() override { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - - if (new_shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); - } - - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr uint32_t val = 42; - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(val) * global_size; - void *shared_ptr = nullptr; - void *new_shared_ptr = nullptr; - ur_exp_command_buffer_command_handle_t command_handle = nullptr; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); - -// Test using a different global size to fill and larger USM output buffer -TEST_P(USMFillCommandTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - Validate((uint32_t *)shared_ptr, global_size, val); - - // Allocate a new USM pointer of larger size if feature is supported. - size_t new_global_size = global_size * 2; - const size_t new_allocation_size = sizeof(val) * new_global_size; - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - new_allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, new_allocation_size); - - // Set new USM pointer as kernel output at index 0 - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &new_shared_ptr, // pArgValue - }; - - // Set new value to use for fill at kernel index 1 - uint32_t new_val = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_val), // argSize - nullptr, // pProperties - &new_val, // hArgValue - }; - - size_t new_local_size = local_size; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); -} - -// Test updating a command-buffer which hasn't been enqueued yet -TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, allocation_size); - - // Set new USM pointer as kernel output at index 0 - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &new_shared_ptr, // pArgValue - }; - - // Set new value to use for fill at kernel index 1 - uint32_t new_val = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_val), // argSize - nullptr, // pProperties - &new_val, // hArgValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - Validate((uint32_t *)new_shared_ptr, global_size, new_val); -} - -// Test updating a command-buffer with multiple USM fill kernel commands -struct USMMultipleFillCommandTest - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - program_name = "fill_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - // Create a single USM allocation which will be used by all kernels - // by accessing at pointer offsets - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - std::memset(shared_ptr, 0, allocation_size); - - // Append multiple kernel commands to command-buffer - for (size_t k = 0; k < num_kernels; k++) { - // Calculate offset into output allocation, and set as - // kernel output. - void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, offset_ptr)); - - // Each kernel has a unique fill value - uint32_t fill_val = val + k; - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), - nullptr, &fill_val)); - - // Append kernel and store returned handle - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &elements, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handles[k])); - ASSERT_NE(command_handles[k], nullptr); - } - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void Validate(uint32_t *pointer, size_t length, uint32_t val) { - for (size_t i = 0; i < length; i++) { - ASSERT_EQ(pointer[i], val); - } - } - - void TearDown() override { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - - if (new_shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr uint32_t val = 42; - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 64; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(val) * global_size; - static constexpr size_t num_kernels = 8; - static constexpr size_t elements = global_size / num_kernels; - - void *shared_ptr = nullptr; - void *new_shared_ptr = nullptr; - std::array - command_handles; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); - -// Test updating all the kernels commands in the command-buffer -TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptr; - for (size_t i = 0; i < global_size; i++) { - const uint32_t expected = val + (i / elements); - ASSERT_EQ(expected, output[i]); - } - - // Create a new USM allocation to update kernel outputs to - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, allocation_size); - - // Update each kernel in the command-buffer. - uint32_t new_val = 33; - for (size_t k = 0; k < num_kernels; k++) { - // Update output pointer to an offset into new USM allocation - void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &offset_ptr, // pArgValue - }; - - // Update fill value - uint32_t new_fill_val = new_val + k; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(int), // argSize - nullptr, // pProperties - &new_fill_val, // hArgValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], - &update_desc)); - } - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *updated_output = (uint32_t *)new_shared_ptr; - for (size_t i = 0; i < global_size; i++) { - uint32_t expected = new_val + (i / elements); - ASSERT_EQ(expected, updated_output[i]) << i; - } -} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp deleted file mode 100644 index ea32f7e046..0000000000 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "fixtures.h" -#include - -// Test that updating a command-buffer with a single kernel command -// taking USM & scalar arguments works correctly. - -struct USMSaxpyKernelTestBase - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - virtual void SetUp() override { - program_name = "saxpy_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - const size_t allocation_size = sizeof(uint32_t) * global_size; - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - - std::vector pattern(allocation_size); - uur::generateMemFillPattern(pattern); - std::memcpy(shared_ptr, pattern.data(), allocation_size); - } - - // Index 0 is output - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); - // Index 1 is A - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); - // Index 2 is X - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); - // Index 3 is Y - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); - } - - void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, - size_t length) { - for (size_t i = 0; i < length; i++) { - uint32_t result = A * X[i] + Y[i]; - ASSERT_EQ(result, output[i]); - } - } - - virtual void TearDown() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr uint32_t A = 42; - std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; -}; - -struct USMSaxpyKernelTest : USMSaxpyKernelTestBase { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); - - // Append kernel command to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void TearDown() override { - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); - } - - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); - } - - ur_exp_command_buffer_command_handle_t command_handle = nullptr; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTest); - -TEST_P(USMSaxpyKernelTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptrs[0]; - uint32_t *X = (uint32_t *)shared_ptrs[1]; - uint32_t *Y = (uint32_t *)shared_ptrs[2]; - Validate(output, X, Y, A, global_size); - - // Update inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} - -struct USMMultiSaxpyKernelTest : USMSaxpyKernelTestBase { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); - - // Append kernel command to command-buffer and close command-buffer - for (unsigned node = 0; node < nodes; node++) { - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handles[node])); - ASSERT_NE(command_handles[node], nullptr); - } - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void TearDown() override { - for (auto &handle : command_handles) { - if (handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(handle)); - } - } - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); - } - - static constexpr size_t nodes = 1024; - static constexpr uint32_t A = 42; - std::array command_handles{}; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultiSaxpyKernelTest); - -TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptrs[0]; - uint32_t *X = (uint32_t *)shared_ptrs[1]; - uint32_t *Y = (uint32_t *)shared_ptrs[2]; - Validate(output, X, Y, A, global_size); - - // Update inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - for (auto &handle : command_handles) { - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); - } - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} - -TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { - // Prepare new inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Run command-buffer prior to update without doing a blocking wait after - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - - // Update kernel and enqueue command-buffer again - for (auto &handle : command_handles) { - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); - } - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 64f885fb94..403d3caa3c 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -5,12 +5,12 @@ if (UR_BUILD_ADAPTER_CUDA) add_conformance_test_with_kernels_environment( - exp_enqueue_native - enqueue_native_cuda.cpp + exp_enqueue_native + enqueue_native_cuda.cpp ) target_include_directories(test-exp_enqueue_native PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/cuda + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif() From 14bc901ba1555e972d5d1ac859a9fe9ac880a726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 9 Sep 2024 15:02:59 +0100 Subject: [PATCH 10/14] Add hip support and create capabilities flag --- include/ur_api.h | 39 +++- include/ur_print.h | 8 + include/ur_print.hpp | 115 +++++++++++- scripts/core/EXP-COMMAND-BUFFER.rst | 13 +- scripts/core/exp-command-buffer.yml | 38 +++- source/adapters/cuda/command_buffer.cpp | 153 ++++++++++----- source/adapters/cuda/command_buffer.hpp | 5 +- source/adapters/cuda/device.cpp | 11 +- source/adapters/hip/command_buffer.cpp | 176 ++++++++++++------ source/adapters/hip/command_buffer.hpp | 10 +- source/adapters/hip/device.cpp | 32 +++- source/adapters/level_zero/device.cpp | 50 +++-- .../level_zero/ur_interface_loader.hpp | 1 + source/adapters/mock/ur_mockddi.cpp | 5 +- source/adapters/native_cpu/device.cpp | 4 +- source/adapters/opencl/command_buffer.cpp | 7 +- source/adapters/opencl/common.cpp | 44 +++-- source/adapters/opencl/common.hpp | 5 +- source/adapters/opencl/device.cpp | 21 ++- source/loader/layers/tracing/ur_trcddi.cpp | 5 +- source/loader/layers/validation/ur_valddi.cpp | 5 +- source/loader/loader.def.in | 1 + source/loader/loader.map.in | 1 + source/loader/ur_ldrddi.cpp | 5 +- source/loader/ur_libapi.cpp | 8 +- source/loader/ur_print.cpp | 8 + source/ur_api.cpp | 8 +- ...xp_command_buffer_adapter_native_cpu.match | 1 + .../conformance/exp_command_buffer/fixtures.h | 82 +++----- .../update/buffer_fill_kernel_update.cpp | 16 +- .../update/buffer_saxpy_kernel_update.cpp | 2 +- .../update/invalid_update.cpp | 39 +++- .../update/kernel_handle_update.cpp | 130 +++++++++++-- .../update/ndrange_update.cpp | 8 +- .../update/usm_fill_kernel_update.cpp | 6 +- .../update/usm_saxpy_kernel_update.cpp | 6 +- .../exp_enqueue_native/CMakeLists.txt | 4 +- tools/urinfo/urinfo.hpp | 4 +- 38 files changed, 789 insertions(+), 287 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index d9ec083906..c162434fdc 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1629,8 +1629,8 @@ typedef enum ur_device_info_t { ///< `EnqueueDeviceGlobalVariableRead` entry points. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of ///< command-buffers. - UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel - ///< commands in a command-buffer. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP = 0x1001, ///< [::ur_device_command_buffer_update_capability_flags_t] Command-buffer + ///< update capabilities of the device UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP = 0x1111, ///< [::ur_bool_t] return true if enqueue Cluster Launch is supported UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images @@ -8155,6 +8155,27 @@ urBindlessImagesSignalExternalSemaphoreExp( #if !defined(__GNUC__) #pragma region command_buffer_(experimental) #endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Device kernel execution capability +typedef uint32_t ur_device_command_buffer_update_capability_flags_t; +typedef enum ur_device_command_buffer_update_capability_flag_t { + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS = UR_BIT(0), ///< Device supports updating the kernel arguments in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE = UR_BIT(1), ///< Device supports updating the local work-group size in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE = UR_BIT(2), ///< Device supports updating the global work-group size in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET = UR_BIT(3), ///< Device supports updating the global work offset in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE = UR_BIT(4), ///< Device supports updating the kernel handle in command-buffer commands. + /// @cond + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_device_command_buffer_update_capability_flag_t; +/// @brief Bit Mask for validating ur_device_command_buffer_update_capability_flags_t +#define UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAGS_MASK 0xffffffe0 + /////////////////////////////////////////////////////////////////////////////// /// @brief Command-buffer query information type typedef enum ur_exp_command_buffer_info_t { @@ -8208,7 +8229,7 @@ typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optional] Pointer to memory object properties. ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index. } ur_exp_command_buffer_update_memobj_arg_desc_t; @@ -8220,7 +8241,7 @@ typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optional] Pointer to USM pointer properties. const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument ///< value to set at argument index. @@ -8234,7 +8255,7 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. uint32_t argSize; ///< [in] Argument size. - const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optional] Pointer to value properties. const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to ///< set at argument index. @@ -8411,8 +8432,9 @@ urCommandBufferAppendKernelLaunchExp( ///< phKernelAlternatives. ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. @@ -8928,8 +8950,9 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/include/ur_print.h b/include/ur_print.h index c70e661fb1..54082d5330 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_command_buffer_update_capability_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_info_t enum /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 6bf77e4023..1408a7dea7 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -197,6 +197,8 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size); @@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params); +inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); @@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: - os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP"; break; case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP"; @@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - const ur_bool_t *tptr = (const ur_bool_t *)ptr; - if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr; + if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")"; return UR_RESULT_ERROR_INVALID_SIZE; } os << (const void *)(tptr) << " ("; - os << *tptr; + ur::details::printFlag(os, + *tptr); os << ")"; } break; @@ -9669,6 +9673,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) { + switch (value) { + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_command_buffer_update_capability_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_info_t type /// @returns /// std::ostream & diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 94df623481..78e7337397 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -167,8 +167,9 @@ Updating Command-Buffer Commands An adapter implementing the command-buffer experimental feature can optionally support updating the configuration of kernel commands recorded to a -command-buffer. Support for this is reported by returning true in the -${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. +command-buffer. The attributes of kernel commands that can be updated are +device specific and can be queried using the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query. Updating kernel commands is done by passing the new kernel configuration to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of @@ -259,7 +260,13 @@ Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_device_info_t * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP - * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP +* ${x}_device_command_buffer_update_capability_flags_t + * UPDATE_KERNEL_ARGUMENTS + * LOCAL_WORK_SIZE + * GLOBAL_WORK_SIZE + * GLOBAL_WORK_OFFSET + * KERNEL_HANDLE * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index d3f5a95bc8..6cfd193479 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -21,9 +21,31 @@ etors: - name: COMMAND_BUFFER_SUPPORT_EXP value: "0x1000" desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." - - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + - name: COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP + desc: "[$x_device_command_buffer_update_capability_flags_t] Command-buffer update capabilities of the device" value: "0x1001" - desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." +--- #-------------------------------------------------------------------------- +type: enum +desc: "Device kernel execution capability" +class: $xDevice +name: $x_device_command_buffer_update_capability_flags_t +etors: + - name: KERNEL_ARGUMENTS + value: "$X_BIT(0)" + desc: "Device supports updating the kernel arguments in command-buffer commands." + - name: LOCAL_WORK_SIZE + value: "$X_BIT(1)" + desc: "Device supports updating the local work-group size in command-buffer commands." + - name: GLOBAL_WORK_SIZE + value: "$X_BIT(2)" + desc: "Device supports updating the global work-group size in command-buffer commands." + - name: GLOBAL_WORK_OFFSET + value: "$X_BIT(3)" + desc: "Device supports updating the global work offset in command-buffer commands." + - name: KERNEL_HANDLE + value: "$X_BIT(4)" + desc: "Device supports updating the kernel handle in command-buffer commands." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -127,7 +149,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_mem_obj_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to memory object properties." + desc: "[in][optional] Pointer to memory object properties." - type: $x_mem_handle_t name: hNewMemObjArg desc: "[in][optional] Handle of memory object to set at argument index." @@ -142,7 +164,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_pointer_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to USM pointer properties." + desc: "[in][optional] Pointer to USM pointer properties." - type: "const void *" name: pNewPointerArg desc: "[in][optional] USM pointer to memory location holding the argument value to set at argument index." @@ -160,7 +182,7 @@ members: desc: "[in] Argument size." - type: "const ur_kernel_arg_value_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to value properties." + desc: "[in][optional] Pointer to value properties." - type: "const void *" name: pNewValueArg desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." @@ -319,7 +341,8 @@ params: name: "phKernelAlternatives" desc: | [in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this - command after the command-buffer is finalized. It's invalid to specify the default kernel `hKernel` as part of this list. + command after the command-buffer is finalized. The default kernel `hKernel` is implicitly marked as an alternative. It's + invalid to specify it as part of this list. - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -931,8 +954,9 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero." + - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size." - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 0a6f0015e8..1305bae515 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -864,12 +864,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - ur_exp_command_buffer_command_handle_t hCommand, - const ur_exp_command_buffer_update_kernel_launch_desc_t - *pUpdateKernelLaunch) { +/** + * Validates contents of the update command description. + * @param[in] Command The command which is being updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + auto CommandBuffer = Command->CommandBuffer; + // Update requires command-buffer to be finalized - ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; if (!CommandBuffer->CudaGraphExec) { return UR_RESULT_ERROR_INVALID_OPERATION; } @@ -879,38 +887,61 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (pUpdateKernelLaunch->newWorkDim) { + const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (NewWorkDim) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + if (NewWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error if local size non-nullptr and created with null // or if local size nullptr and created with non-null const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + UpdateCommandDesc->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { return UR_RESULT_ERROR_INVALID_OPERATION; } } - // Kernel corresponding to the command to update - ur_kernel_handle_t NewKernel = pUpdateKernelLaunch->hNewKernel; - - if (hCommand->ValidKernelHandles.count(NewKernel)) { - hCommand->Kernel = NewKernel; - } else { + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } + return UR_RESULT_SUCCESS; +} + +/** + * Updates the arguments of CommandDesc->hNewKernel + * @param[in] Device The device associated with the kernel being updated. + * @param[in] UpdateCommandDesc The update command description that contains the + * new kernel and its arguments. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateKernelArguments(ur_device_handle_t Device, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + ur_kernel_handle_t NewKernel = UpdateCommandDesc->hNewKernel; // Update pointer arguments to the kernel - uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + uint32_t NumPointerArgs = UpdateCommandDesc->numNewPointerArgs; const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = - pUpdateKernelLaunch->pNewPointerArgList; + UpdateCommandDesc->pNewPointerArgList; for (uint32_t i = 0; i < NumPointerArgs; i++) { const auto &PointerArgDesc = ArgPointerList[i]; uint32_t ArgIndex = PointerArgDesc.argIndex; @@ -926,9 +957,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update memobj arguments to the kernel - uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + uint32_t NumMemobjArgs = UpdateCommandDesc->numNewMemObjArgs; const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = - pUpdateKernelLaunch->pNewMemObjArgList; + UpdateCommandDesc->pNewMemObjArgList; for (uint32_t i = 0; i < NumMemobjArgs; i++) { const auto &MemobjArgDesc = ArgMemobjList[i]; uint32_t ArgIndex = MemobjArgDesc.argIndex; @@ -939,8 +970,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (ArgValue == nullptr) { NewKernel->setKernelArg(ArgIndex, 0, nullptr); } else { - CUdeviceptr CuPtr = - std::get(ArgValue->Mem).getPtr(CommandBuffer->Device); + CUdeviceptr CuPtr = std::get(ArgValue->Mem).getPtr(Device); NewKernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); } } catch (ur_result_t Err) { @@ -950,9 +980,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update value arguments to the kernel - uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + uint32_t NumValueArgs = UpdateCommandDesc->numNewValueArgs; const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = - pUpdateKernelLaunch->pNewValueArgList; + UpdateCommandDesc->pNewValueArgList; for (uint32_t i = 0; i < NumValueArgs; i++) { const auto &ValueArgDesc = ArgValueList[i]; uint32_t ArgIndex = ValueArgDesc.argIndex; @@ -960,7 +990,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const void *ArgValue = ValueArgDesc.pNewValueArg; ur_result_t Result = UR_RESULT_SUCCESS; - try { NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue); } catch (ur_result_t Err) { @@ -969,45 +998,68 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } } - // Set the updated ND range - const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; - if (NewWorkDim != 0) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - hCommand->WorkDim = NewWorkDim; + return UR_RESULT_SUCCESS; +} + +/** + * Updates the command buffer command with new values from the update + * description. + * @param[in] Command The command to be updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateCommand(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + Command->Kernel = UpdateCommandDesc->hNewKernel; + + if (UpdateCommandDesc->newWorkDim) { + Command->WorkDim = UpdateCommandDesc->newWorkDim; } - if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { - hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + if (UpdateCommandDesc->pNewGlobalWorkOffset) { + Command->setGlobalOffset(UpdateCommandDesc->pNewGlobalWorkOffset); } - if (pUpdateKernelLaunch->pNewGlobalWorkSize) { - hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + if (UpdateCommandDesc->pNewGlobalWorkSize) { + Command->setGlobalSize(UpdateCommandDesc->pNewGlobalWorkSize); } - if (pUpdateKernelLaunch->pNewLocalWorkSize) { - hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + if (UpdateCommandDesc->pNewLocalWorkSize) { + Command->setLocalSize(UpdateCommandDesc->pNewLocalWorkSize); } - size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; - size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { - // If no worksize is provided make sure we pass nullptr to setKernelParams so + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch)); + UR_CHECK_ERROR( + updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); + UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); + + // If no work-size is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; - uint32_t WorkDim = hCommand->WorkDim; // Set the number of threads per block to the number of threads per warp - // by default unless user has provided a better number + // by default unless user has provided a better number. size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - CUfunction CuFunc = NewKernel->get(); - ur_context_handle_t Context = CommandBuffer->Context; - ur_device_handle_t Device = CommandBuffer->Device; - auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, - GlobalWorkSize, LocalWorkSize, NewKernel, - CuFunc, ThreadsPerBlock, BlocksPerGrid); + CUfunction CuFunc = hCommand->Kernel->get(); + auto Result = setKernelParams( + CommandBuffer->Context, CommandBuffer->Device, hCommand->WorkDim, + hCommand->GlobalWorkOffset, hCommand->GlobalWorkSize, LocalWorkSize, + hCommand->Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid); if (Result != UR_RESULT_SUCCESS) { return Result; } @@ -1021,8 +1073,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDimX = ThreadsPerBlock[0]; Params.blockDimY = ThreadsPerBlock[1]; Params.blockDimZ = ThreadsPerBlock[2]; - Params.sharedMemBytes = NewKernel->getLocalSize(); - Params.kernelParams = const_cast(NewKernel->getArgIndices().data()); + Params.sharedMemBytes = hCommand->Kernel->getLocalSize(); + Params.kernelParams = + const_cast(hCommand->Kernel->getArgIndices().data()); CUgraphNode Node = hCommand->Node; CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 49e3ba8b25..a936bad72f 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -99,11 +99,10 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t CommandBuffer; - /* The currently active kernel handle for this command */ + // The currently active kernel handle for this command. ur_kernel_handle_t Kernel; - /* Set of all the kernel handles that can be used when updating this command - */ + // Set of all the kernel handles that can be used when updating this command. std::unordered_set ValidKernelHandles; CUgraphNode Node; diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index bbaaa27cdb..7daf8bdbc8 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1093,8 +1093,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:*/ return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + return ReturnValue(UpdateCapabilities); + } case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: { int Value = getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 9; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index ef6e6fe83c..bc533c564d 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -48,9 +48,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) - : Context(hContext), Device(hDevice), - IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { + : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), + HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1}, + RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -78,7 +78,8 @@ ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr) + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives) : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); @@ -98,6 +99,13 @@ ur_exp_command_buffer_command_handle_t_:: std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); } + + /* Add the default Kernel as a valid kernel handle for this command */ + ValidKernelHandles.insert(Kernel); + if (KernelAlternatives) { + ValidKernelHandles.insert(KernelAlternatives, + KernelAlternatives + NumKernelAlternatives); + } } /// Helper function for finding the HIP Nodes associated with the commands in a @@ -312,8 +320,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t /*numKernelAlternatives*/, - ur_kernel_handle_t * /*phKernelAlternatives*/, + uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, @@ -390,8 +397,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, GraphNode, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, GraphNode, + NodeParams, workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numKernelAlternatives, + phKernelAlternatives}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); @@ -834,12 +843,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - ur_exp_command_buffer_command_handle_t hCommand, - const ur_exp_command_buffer_update_kernel_launch_desc_t - *pUpdateKernelLaunch) { +/** + * Validates contents of the update command description. + * @param[in] Command The command which is being updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + auto CommandBuffer = Command->CommandBuffer; + // Update requires command-buffer to be finalized - ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; if (!CommandBuffer->HIPGraphExec) { return UR_RESULT_ERROR_INVALID_OPERATION; } @@ -849,53 +866,78 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { - // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { + const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (NewWorkDim) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + if (NewWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error if local size non-nullptr and created with null // or if local size nullptr and created with non-null const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + UpdateCommandDesc->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { return UR_RESULT_ERROR_INVALID_OPERATION; } } - // Kernel corresponding to the command to update - ur_kernel_handle_t Kernel = hCommand->Kernel; - ur_device_handle_t Device = CommandBuffer->Device; + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} + +/** + * Updates the arguments of CommandDesc->hNewKernel + * @param[in] Device The device associated with the kernel being updated. + * @param[in] UpdateCommandDesc The update command description that contains the + * new kernel and its arguments. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateKernelArguments(ur_device_handle_t Device, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + ur_kernel_handle_t NewKernel = UpdateCommandDesc->hNewKernel; // Update pointer arguments to the kernel - uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + uint32_t NumPointerArgs = UpdateCommandDesc->numNewPointerArgs; const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = - pUpdateKernelLaunch->pNewPointerArgList; + UpdateCommandDesc->pNewPointerArgList; for (uint32_t i = 0; i < NumPointerArgs; i++) { const auto &PointerArgDesc = ArgPointerList[i]; uint32_t ArgIndex = PointerArgDesc.argIndex; const void *ArgValue = PointerArgDesc.pNewPointerArg; try { - Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + NewKernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); } catch (ur_result_t Err) { return Err; } } // Update memobj arguments to the kernel - uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + uint32_t NumMemobjArgs = UpdateCommandDesc->numNewMemObjArgs; const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = - pUpdateKernelLaunch->pNewMemObjArgList; + UpdateCommandDesc->pNewMemObjArgList; for (uint32_t i = 0; i < NumMemobjArgs; i++) { const auto &MemobjArgDesc = ArgMemobjList[i]; uint32_t ArgIndex = MemobjArgDesc.argIndex; @@ -903,10 +945,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( try { if (ArgValue == nullptr) { - Kernel->setKernelArg(ArgIndex, 0, nullptr); + NewKernel->setKernelArg(ArgIndex, 0, nullptr); } else { void *HIPPtr = std::get(ArgValue->Mem).getVoid(Device); - Kernel->setKernelArg(ArgIndex, sizeof(void *), (void *)&HIPPtr); + NewKernel->setKernelArg(ArgIndex, sizeof(void *), (void *)&HIPPtr); } } catch (ur_result_t Err) { return Err; @@ -914,9 +956,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update value arguments to the kernel - uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + uint32_t NumValueArgs = UpdateCommandDesc->numNewValueArgs; const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = - pUpdateKernelLaunch->pNewValueArgList; + UpdateCommandDesc->pNewValueArgList; for (uint32_t i = 0; i < NumValueArgs; i++) { const auto &ValueArgDesc = ArgValueList[i]; uint32_t ArgIndex = ValueArgDesc.argIndex; @@ -924,49 +966,74 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const void *ArgValue = ValueArgDesc.pNewValueArg; try { - Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue); } catch (ur_result_t Err) { return Err; } } - // Set the updated ND range - const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; - if (NewWorkDim != 0) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - hCommand->WorkDim = NewWorkDim; + return UR_RESULT_SUCCESS; +} + +/** + * Updates the command buffer command with new values from the update + * description. + * @param[in] Command The command to be updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateCommand(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + Command->Kernel = UpdateCommandDesc->hNewKernel; + + if (UpdateCommandDesc->hNewKernel) { + Command->WorkDim = UpdateCommandDesc->newWorkDim; } - if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { - hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + if (UpdateCommandDesc->pNewGlobalWorkOffset) { + Command->setGlobalOffset(UpdateCommandDesc->pNewGlobalWorkOffset); } - if (pUpdateKernelLaunch->pNewGlobalWorkSize) { - hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + if (UpdateCommandDesc->pNewGlobalWorkSize) { + Command->setGlobalSize(UpdateCommandDesc->pNewGlobalWorkSize); } - if (pUpdateKernelLaunch->pNewLocalWorkSize) { - hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + if (UpdateCommandDesc->pNewLocalWorkSize) { + Command->setLocalSize(UpdateCommandDesc->pNewLocalWorkSize); } - size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; - size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch)); + UR_CHECK_ERROR( + updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); + UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); // If no worksize is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; - uint32_t WorkDim = hCommand->WorkDim; // Set the number of threads per block to the number of threads per warp // by default unless user has provided a better number size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - hipFunction_t HIPFunc = Kernel->get(); - UR_CHECK_ERROR(setKernelParams(Device, WorkDim, GlobalWorkOffset, - GlobalWorkSize, LocalWorkSize, Kernel, HIPFunc, - ThreadsPerBlock, BlocksPerGrid)); + hipFunction_t HIPFunc = hCommand->Kernel->get(); + UR_CHECK_ERROR(setKernelParams( + CommandBuffer->Device, hCommand->WorkDim, hCommand->GlobalWorkOffset, + hCommand->GlobalWorkSize, LocalWorkSize, hCommand->Kernel, HIPFunc, + ThreadsPerBlock, BlocksPerGrid)); hipKernelNodeParams &Params = hCommand->Params; @@ -977,8 +1044,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDim.x = ThreadsPerBlock[0]; Params.blockDim.y = ThreadsPerBlock[1]; Params.blockDim.z = ThreadsPerBlock[2]; - Params.sharedMemBytes = Kernel->getLocalSize(); - Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + Params.sharedMemBytes = hCommand->Kernel->getLocalSize(); + Params.kernelParams = + const_cast(hCommand->Kernel->getArgIndices().data()); hipGraphNode_t Node = hCommand->Node; hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec; diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index d744a3544d..c14fe50ff4 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -15,6 +15,7 @@ #include "context.hpp" #include #include +#include // Trace an internal UR call #define UR_TRACE(Call) \ @@ -43,7 +44,8 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr); + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives); void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { const size_t CopySize = sizeof(size_t) * WorkDim; @@ -95,7 +97,13 @@ struct ur_exp_command_buffer_command_handle_t_ { } ur_exp_command_buffer_handle_t CommandBuffer; + + // The currently active kernel handle for this command. ur_kernel_handle_t Kernel; + + // Set of all the kernel handles that can be used when updating this command. + std::unordered_set ValidKernelHandles; + hipGraphNode_t Node; hipKernelNodeParams Params; diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 3ae98e929d..b3b211af5a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -905,17 +905,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: */ { + int DriverVersion = 0; + UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); + + // Return supported for the UR command-buffer experimental feature on + // ROCM 5.5.1 and later. This is to workaround HIP driver bug + // https://github.com/ROCm/HIP/issues/2450 in older versions. + // + // The version is returned as (10000000 major + 1000000 minor + patch). + const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 + return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { int DriverVersion = 0; UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - - // Return supported for the UR command-buffer experimental feature on - // ROCM 5.5.1 and later. This is to workaround HIP driver bug - // https://github.com/ROCm/HIP/issues/2450 in older versions. - // - // The version is returned as (10000000 major + 1000000 minor + patch). const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 - return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + if (DriverVersion < CmdBufDriverMinVersion) { + return ReturnValue( + static_cast(0)); + } + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + return ReturnValue(UpdateCapabilities); } default: break; diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index e6cb650420..507695ec91 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -994,20 +994,44 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // Update support requires being able to update kernel arguments and all - // aspects of the kernel NDRange. - const ze_mutable_command_exp_flags_t UpdateMask = - ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | + // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // // Update support requires being able to update kernel arguments and + // all + // // aspects of the kernel NDRange. + // const ze_mutable_command_exp_flags_t UpdateMask = + // ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; + // + // const bool KernelArgUpdateSupport = + // (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & + // UpdateMask) == UpdateMask; + // return ReturnValue(KernelArgUpdateSupport && + // Device->Platform->ZeMutableCmdListExt.Supported); + // } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + const bool ZeMutableCommandFlags = + Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags; + + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; + if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + ze_mutable_command_exp_flags_t ReqUpdateWG = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | - ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; - - const bool KernelArgUpdateSupport = - (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & - UpdateMask) == UpdateMask; - return ReturnValue(KernelArgUpdateSupport && - Device->Platform->ZeMutableCmdListExt.Supported); + ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; + if ((ZeMutableCommandFlags & ReqUpdateWG) == ReqUpdateWG) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + return ReturnValue(UpdateCapabilities); } case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { // On L0 bindless images are supported. diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index f95625dd5b..2b163f6749 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -557,6 +557,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 876e895322..714bf7817c 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8356,8 +8356,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index c5652398e3..bed0898d04 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -388,8 +388,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: + return ReturnValue( + static_cast(0)); case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: return ReturnValue(false); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 34cb7f1a3c..571d14f5d8 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -71,10 +71,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( const bool IsUpdatable = pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; - bool DeviceSupportsUpdate = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; cl_device_id CLDevice = cl_adapter::cast(hDevice); - CL_RETURN_ON_FAILURE(deviceSupportsURCommandBufferKernelUpdate( - CLDevice, DeviceSupportsUpdate)); + CL_RETURN_ON_FAILURE( + getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities)); + bool DeviceSupportsUpdate = UpdateCapabilities > 0; if (IsUpdatable && !DeviceSupportsUpdate) { return UR_RESULT_ERROR_INVALID_OPERATION; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 03775fb87d..13219cd6a7 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -116,8 +116,10 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { return UR_RESULT_SUCCESS; } -cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, - bool &Result) { +cl_int getDeviceCommandBufferUpdateCapabilities( + cl_device_id Dev, + ur_device_command_buffer_update_capability_flags_t &UpdateCapabilities) { + size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -129,21 +131,37 @@ cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, std::string SupportedExtensions(ExtStr.c_str()); if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") == std::string::npos) { - Result = false; + UpdateCapabilities = 0; return CL_SUCCESS; } - // All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must - // be supported by a device for UR update. - cl_mutable_dispatch_fields_khr mutable_capabilities; + cl_mutable_dispatch_fields_khr MutableCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, - sizeof(mutable_capabilities), &mutable_capabilities, nullptr)); - const cl_mutable_dispatch_fields_khr required_caps = - CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | - CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | - CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | - CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; - Result = (mutable_capabilities & required_caps) == required_caps; + sizeof(MutableCapabilities), &MutableCapabilities, nullptr)); + + if (!(MutableCapabilities & CL_MUTABLE_DISPATCH_EXEC_INFO_KHR)) { + UpdateCapabilities = 0; + return CL_SUCCESS; + } + + UpdateCapabilities = 0; + if (MutableCapabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + return CL_SUCCESS; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 18b08bf095..66e1b3bc03 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -417,5 +417,6 @@ ur_result_t mapCLErrorToUR(cl_int Result); ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); -cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, - bool &Result); +cl_int getDeviceCommandBufferUpdateCapabilities( + cl_device_id Dev, + ur_device_command_buffer_update_capability_flags_t &UpdateCapabilities); diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 071a3a7c5a..d8e0fc4e1f 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1065,12 +1065,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(ExtStr.find("cl_khr_command_buffer") != std::string::npos); } - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // cl_device_id Dev = cl_adapter::cast(hDevice); + // ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; + // CL_RETURN_ON_FAILURE( + // deviceSupportsURCommandBufferKernelUpdate(Dev, + // UpdateCapabilities)); + // ur_device_command_buffer_update_capability_flags_t + // RequiredCapabilities = + // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_WORKGROUP; + // return ReturnValue((UpdateCapabilities & RequiredCapabilities) == + // RequiredCapabilities); + // } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); - bool Supported = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; CL_RETURN_ON_FAILURE( - deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); - return ReturnValue(Supported); + getDeviceCommandBufferUpdateCapabilities(Dev, UpdateCapabilities)); + return ReturnValue(UpdateCapabilities); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f21320b830..a3f48fd533 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6500,8 +6500,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index b05194bef1..542dfc3be5 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8061,8 +8061,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index 5e628b4faf..63a5f1843d 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -235,6 +235,7 @@ EXPORTS urPrintContextSetExtendedDeleterParams urPrintDeviceAffinityDomainFlags urPrintDeviceBinary + urPrintDeviceCommandBufferUpdateCapabilityFlags urPrintDeviceCreateWithNativeHandleParams urPrintDeviceExecCapabilityFlags urPrintDeviceFpCapabilityFlags diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 18e4018aee..a4ca4a713f 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -235,6 +235,7 @@ urPrintContextSetExtendedDeleterParams; urPrintDeviceAffinityDomainFlags; urPrintDeviceBinary; + urPrintDeviceCommandBufferUpdateCapabilityFlags; urPrintDeviceCreateWithNativeHandleParams; urPrintDeviceExecCapabilityFlags; urPrintDeviceFpCapabilityFlags; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 20a5e8acfa..ddcb63cda1 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7112,8 +7112,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 36e61ba09d..81937709ca 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7551,8 +7551,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8313,8 +8314,9 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index f9d510e95d..454dc6d436 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -980,6 +980,14 @@ urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintDeviceCommandBufferUpdateCapabilityFlags( + enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 0babfaf8ae..f5f02bbee4 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6407,8 +6407,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7026,8 +7027,9 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index 096a052315..a4b2789372 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -15,6 +15,7 @@ {{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_ {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index f81b664d7f..2cc91c4d3f 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -32,14 +32,20 @@ static void checkCommandBufferSupport(ur_device_handle_t device) { } } -static void checkCommandBufferUpdateSupport(ur_device_handle_t device) { - bool updatable_command_buffer_support; +static void checkCommandBufferUpdateSupport( + ur_device_handle_t device, + ur_device_command_buffer_update_capability_flags_t requiredCapabilities) { + ur_device_command_buffer_update_capability_flags_t update_capability_flags; ASSERT_SUCCESS(urDeviceGetInfo( - device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP, + sizeof(update_capability_flags), &update_capability_flags, nullptr)); - if (!updatable_command_buffer_support) { + if (!update_capability_flags) { GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } else if ((update_capability_flags & requiredCapabilities) != + requiredCapabilities) { + GTEST_SKIP() << "Some of the command-buffer update capabilities " + "required are not supported by the device."; } } @@ -109,7 +115,14 @@ struct urUpdatableCommandBufferExpTest : uur::urQueueTest { UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); - UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); + + auto requiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); // Create a command-buffer with update enabled. ur_exp_command_buffer_desc_t desc{ @@ -118,36 +131,16 @@ struct urUpdatableCommandBufferExpTest : uur::urQueueTest { ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); ASSERT_NE(updatable_cmd_buf_handle, nullptr); - - // Currently there are synchronization issue with immediate submission when used for command buffers. - // So, create queue with batched submission for this test suite if the backend is Level Zero. - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { - ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/flags, - }; - ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); - ASSERT_NE(queue, nullptr); - } else { - queue = urQueueTest::queue; - } } void TearDown() override { if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); } ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; - ur_queue_handle_t queue = nullptr; ur_platform_backend_t backend{}; }; @@ -155,30 +148,17 @@ struct urUpdatableCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, - &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); - } - - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); - } - - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; - -}; - -struct urUpdatableCommandBufferExpExecutionTest - : uur::urKernelExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); + auto requiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); // Create a command-buffer with update enabled. ur_exp_command_buffer_desc_t desc{ @@ -193,10 +173,10 @@ struct urUpdatableCommandBufferExpExecutionTest if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - UUR_RETURN_ON_FATAL_FAILURE( - urKernelExecutionTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); } + ur_platform_backend_t backend{}; ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; }; diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index a12a7903a3..e694465fd2 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -124,7 +124,7 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -177,7 +177,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -228,7 +228,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -257,7 +257,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -276,7 +276,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -321,7 +321,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -349,7 +349,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -406,7 +406,7 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 2, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index d33ba3a563..19da365084 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -184,7 +184,7 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index fd9a46c2aa..c5947e039f 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -90,7 +90,7 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -141,7 +141,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -178,7 +178,7 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -216,7 +216,7 @@ TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -249,7 +249,7 @@ TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -267,3 +267,32 @@ TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); } + +// If the kernel handle is not being updated, then it's invalid to change +// the number of dimensions. +TEST_P(InvalidUpdateTest, InvalidDimensions) { + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + finalized = true; + + size_t new_global_size = 64; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions + 1, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command local size to NULL when created with non-NULL value + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index a533786917..9aa7b83817 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -73,7 +73,7 @@ struct TestSaxpyKernel : public TestKernel { ASSERT_NO_FATAL_FAILURE(buildKernel()); const size_t AllocationSize = sizeof(uint32_t) * GlobalSize; - for (auto &SharedPtr : Memory) { + for (auto &SharedPtr : Allocations) { ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, AllocationSize, &SharedPtr)); ASSERT_NE(SharedPtr, nullptr); @@ -84,28 +84,80 @@ struct TestSaxpyKernel : public TestKernel { } // Index 0 is the output - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 0, nullptr, Memory[0])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 0, nullptr, Allocations[0])); // Index 1 is A ASSERT_SUCCESS(urKernelSetArgValue(Kernel, 1, sizeof(A), nullptr, &A)); // Index 2 is X - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 2, nullptr, Memory[1])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 2, nullptr, Allocations[1])); // Index 3 is Y - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 3, nullptr, Memory[2])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 3, nullptr, Allocations[2])); + + UpdatePointerDesc[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &Allocations[0], // pArgValue + }; + + UpdatePointerDesc[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &Allocations[1], // pArgValue + }; + + UpdatePointerDesc[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &Allocations[2], // pArgValue + }; + + UpdateValDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(A), // argSize + nullptr, // pProperties + &A, // hArgValue + }; + + UpdateDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + Kernel, // hNewKernel + 0, // numNewMemObjArgs + 3, // numNewPointerArgs + 1, // numNewValueArgs + NDimensions, // newWorkDim + nullptr, // pNewMemObjArgList + UpdatePointerDesc.data(), // pNewPointerArgList + &UpdateValDesc, // pNewValueArgList + &GlobalOffset, // pNewGlobalWorkOffset + &GlobalSize, // pNewGlobalWorkSize + &LocalSize, // pNewLocalWorkSize + }; } void destroyKernel() override { - for (auto &shared_ptr : Memory) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(Context, shared_ptr)); + for (auto &Allocation : Allocations) { + if (Allocation) { + EXPECT_SUCCESS(urUSMFree(Context, Allocation)); } } ASSERT_NO_FATAL_FAILURE(TestKernel::destroyKernel()); } void validate() override { - auto *output = static_cast(Memory[0]); - auto *X = static_cast(Memory[1]); - auto *Y = static_cast(Memory[2]); + auto *output = static_cast(Allocations[0]); + auto *X = static_cast(Allocations[1]); + auto *Y = static_cast(Allocations[2]); for (size_t i = 0; i < GlobalSize; i++) { uint32_t result = A * X[i] + Y[i]; @@ -113,13 +165,18 @@ struct TestSaxpyKernel : public TestKernel { } } - const size_t LocalSize = 4; - const size_t GlobalSize = 32; - const size_t GlobalOffset = 0; - const size_t NDimensions = 1; - const uint32_t A = 42; + std::array + UpdatePointerDesc; + ur_exp_command_buffer_update_value_arg_desc_t UpdateValDesc; + ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc; + + size_t LocalSize = 4; + size_t GlobalSize = 32; + size_t GlobalOffset = 0; + uint32_t NDimensions = 1; + uint32_t A = 42; - std::array Memory = {nullptr, nullptr, nullptr}; + std::array Allocations = {nullptr, nullptr, nullptr}; }; struct TestFill2DKernel : public TestKernel { @@ -210,6 +267,11 @@ struct urCommandBufferKernelHandleUpdateTest UUR_RETURN_ON_FATAL_FAILURE(urUpdatableCommandBufferExpTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::checkCommandBufferUpdateSupport( + device, + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE)); + ur_device_usm_access_capability_flags_t shared_usm_flags; ASSERT_SUCCESS( uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); @@ -311,6 +373,42 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, UpdateAgain) { ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); } +/* Test that it is possible to change the kernel handle in a command and later restore it to the original handle */ +TEST_P(urCommandBufferKernelHandleUpdateTest, RestoreOriginalKernel) { + + std::vector KernelAlternatives = { + FillUSM2DKernel->Kernel}; + + uur::raii::CommandBufferCommand CommandHandle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, + &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + KernelAlternatives.data(), 0, nullptr, nullptr, CommandHandle.ptr())); + ASSERT_NE(CommandHandle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); + + // Updating A, so that the second launch of the saxpy kernel actually has a different output. + SaxpyKernel->A = 20; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &SaxpyKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); +} + TEST_P(urCommandBufferKernelHandleUpdateTest, KernelAlternativeNotRegistered) { uur::raii::CommandBufferCommand CommandHandle; diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index 4c5ff6449a..dd3f17a90a 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -128,7 +128,7 @@ TEST_P(NDRangeUpdateTest, Update3D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -173,7 +173,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -218,7 +218,7 @@ TEST_P(NDRangeUpdateTest, Update1D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -252,7 +252,7 @@ TEST_P(NDRangeUpdateTest, Invalid) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index 31b14e9016..b437971e9a 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -120,7 +120,7 @@ TEST_P(USMFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -174,7 +174,7 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -326,7 +326,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 93fc683127..21f21afa11 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -148,7 +148,7 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -254,7 +254,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -320,7 +320,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 403d3caa3c..8638fa1349 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -9,8 +9,8 @@ if (UR_BUILD_ADAPTER_CUDA) enqueue_native_cuda.cpp ) target_include_directories(test-exp_enqueue_native PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/cuda + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif() diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index 22f4ec6413..59f4a8e5b2 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -334,8 +334,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); std::cout << prefix; - printDeviceInfo( - hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP); std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP); std::cout << prefix; From a7c2f1b9781aac8a6b5181b4df7e96a14a5c0457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 9 Sep 2024 16:58:00 +0100 Subject: [PATCH 11/14] Make newWorkDim non-optional and remove newLocalWorkgroup nullptr errors --- include/ur_api.h | 8 +- scripts/core/exp-command-buffer.yml | 10 +- source/adapters/cuda/command_buffer.cpp | 54 +++---- source/adapters/hip/command_buffer.cpp | 77 +++++---- source/adapters/level_zero/command_buffer.cpp | 51 +++--- source/adapters/level_zero/device.cpp | 16 +- source/loader/layers/validation/ur_valddi.cpp | 10 ++ source/loader/ur_libapi.cpp | 8 +- source/ur_api.cpp | 8 +- .../update/buffer_fill_kernel_update.cpp | 58 +++---- .../update/buffer_saxpy_kernel_update.cpp | 4 +- .../update/invalid_update.cpp | 146 +++++++++--------- .../update/ndrange_update.cpp | 8 +- .../update/usm_fill_kernel_update.cpp | 28 ++-- .../update/usm_saxpy_kernel_update.cpp | 6 +- 15 files changed, 257 insertions(+), 235 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index c162434fdc..5f7405bb57 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8950,17 +8950,15 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 6cfd193479..ee394a6ecc 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -954,17 +954,15 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - - "If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size." + - "`pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL`" + - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE - $X_RESULT_ERROR_INVALID_ENUMERATION - - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION: + - "`pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3`" - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - $X_RESULT_ERROR_INVALID_VALUE: - "If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of $xCommandBufferAppendKernelLaunchExp when this command was created." diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 1305bae515..d9899b8f30 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -887,37 +887,37 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - if (NewWorkDim) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - if (NewWorkDim != Command->WorkDim && - Command->Kernel == UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + // const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + // if (!NewWorkDim) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } - // Error If Local size and not global size - if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - UpdateCommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); + // if (NewWorkDim) { + // UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + // UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + if (UpdateCommandDesc->newWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; } + // // Error If Local size and not global size + // if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + // (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + + // // Error if local size non-nullptr and created with null + // // or if local size nullptr and created with non-null + // const bool IsNewLocalSizeNull = + // UpdateCommandDesc->pNewLocalWorkSize == nullptr; + // const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); + // + // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + // } + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index bc533c564d..86552881d1 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -18,6 +18,7 @@ #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" +#include #include @@ -48,9 +49,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) - : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), - HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1}, - RefCountExternal{1}, NextSyncPoint{0} { + : Context(hContext), Device(hDevice), + IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -330,9 +331,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_RESULT_ERROR_INVALID_KERNEL); UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(!(pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + for (uint32_t i = 0; i < numKernelAlternatives; ++i) { + UR_ASSERT(phKernelAlternatives[i] != hKernel, + UR_RESULT_ERROR_INVALID_VALUE); + } + hipGraphNode_t GraphNode; std::vector DepsList; @@ -866,37 +873,41 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - if (NewWorkDim) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + // const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + // if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } - if (NewWorkDim != Command->WorkDim && - Command->Kernel == UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + // if (NewWorkDim) { + // UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + // UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - UpdateCommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); + std::cerr << "HERE" << std::endl; + std::cerr << UpdateCommandDesc->newWorkDim << std::endl; + std::cerr << Command->WorkDim << std::endl; - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + if (UpdateCommandDesc->newWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; } + // // Error If Local size and not global size + // if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + // (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + + // // Error if local size non-nullptr and created with null + // // or if local size nullptr and created with non-null + // const bool IsNewLocalSizeNull = + // UpdateCommandDesc->pNewLocalWorkSize == nullptr; + // const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); + // + // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + // } + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } @@ -907,8 +918,8 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, /** * Updates the arguments of CommandDesc->hNewKernel * @param[in] Device The device associated with the kernel being updated. - * @param[in] UpdateCommandDesc The update command description that contains the - * new kernel and its arguments. + * @param[in] UpdateCommandDesc The update command description that contains + * the new kernel and its arguments. * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t @@ -1020,8 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); - // If no worksize is provided make sure we pass nullptr to setKernelParams so - // it can guess the local work size. + // If no worksize is provided make sure we pass nullptr to setKernelParams + // so it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 16876976ca..9eee0d07e9 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1320,35 +1320,42 @@ ur_result_t validateCommandDesc( ->mutableCommandFlags; logger::debug("Mutable features supported by device {}", SupportedFeatures); - uint32_t Dim = CommandDesc->newWorkDim; - if (Dim != 0) { - // Error if work dim changes - if (Dim != Command->WorkDim) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((CommandDesc->pNewLocalWorkSize != nullptr) && - (CommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = !Command->UserDefinedLocalSize; + // kernel handle updates are not yet supported. + if (CommandDesc->hNewKernel != Command->Kernel) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + // uint32_t Dim = CommandDesc->newWorkDim; + // if (Dim != 0) { + // Error if work dim changes + if (CommandDesc->hNewKernel == Command->Kernel && + CommandDesc->newWorkDim != Command->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; } + // // Error If Local size and not global size + // if ((CommandDesc->pNewLocalWorkSize != nullptr) && + // (CommandDesc->pNewGlobalWorkSize == nullptr)) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + + // // Error if local size non-nullptr and created with null + // // or if local size nullptr and created with non-null + // const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == + // nullptr; const bool IsOriginalLocalSizeNull = + // !Command->UserDefinedLocalSize; + // + // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + // return UR_RESULT_ERROR_INVALID_OPERATION; + // } + // } + // Check if new global offset is provided. size_t *NewGlobalWorkOffset = CommandDesc->pNewGlobalWorkOffset; UR_ASSERT(!NewGlobalWorkOffset || (SupportedFeatures & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET), UR_RESULT_ERROR_UNSUPPORTED_FEATURE); - if (NewGlobalWorkOffset && Dim > 0) { + if (NewGlobalWorkOffset) { if (!CommandBuffer->Context->getPlatform() ->ZeDriverGlobalOffsetExtensionFound) { logger::error("No global offset extension found on this driver"); @@ -1618,8 +1625,6 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(CommandDesc->newWorkDim <= 3, - UR_RESULT_ERROR_INVALID_WORK_DIMENSION); // Lock command, kernel and command buffer for update. std::scoped_lock Guard( diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 507695ec91..463054c735 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1013,21 +1013,25 @@ ur_result_t urDeviceGetInfo( case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { const bool ZeMutableCommandFlags = Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags; + auto supportsFlags = [&](ze_mutable_command_exp_flags_t RequiredFlags) { + if ((ZeMutableCommandFlags & RequiredFlags) == RequiredFlags) { + return true; + } + return false; + }; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; - if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; } - ze_mutable_command_exp_flags_t ReqUpdateWG = - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; - if ((ZeMutableCommandFlags & ReqUpdateWG) == ReqUpdateWG) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | + ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; } - if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; } diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 542dfc3be5..dbca114c58 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8955,6 +8955,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (NULL == pUpdateKernelLaunch) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (pUpdateKernelLaunch->pNewLocalWorkSize != NULL && + pUpdateKernelLaunch->pNewGlobalWorkSize == NULL) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (pUpdateKernelLaunch->newWorkDim < 0 || + pUpdateKernelLaunch->newWorkDim > 3) { + return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; + } } ur_result_t result = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 81937709ca..f3f6b27bba 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -8314,17 +8314,15 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index f5f02bbee4..8c005ecb5d 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7027,17 +7027,15 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index e694465fd2..08be337466 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -73,7 +73,7 @@ struct BufferFillCommandTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t buffer_size = sizeof(val) * global_size; ur_mem_handle_t buffer = nullptr; ur_mem_handle_t new_buffer = nullptr; @@ -128,7 +128,7 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -181,7 +181,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 1, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -232,7 +232,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -261,7 +261,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -276,17 +276,17 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize }; ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( @@ -325,7 +325,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &first_input_desc, // pNewValueArgList @@ -353,7 +353,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &second_input_desc, // pNewValueArgList @@ -406,17 +406,17 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 2, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - input_descs, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 2, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + input_descs, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize }; ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index 19da365084..69ba67eb0f 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -130,7 +130,7 @@ struct BufferSaxpyKernelTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr uint32_t A = 42; std::array buffers = {nullptr, nullptr, nullptr, nullptr}; @@ -188,7 +188,7 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim new_input_descs, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_A_desc, // pNewValueArgList diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index c5947e039f..2ebd328630 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -65,7 +65,7 @@ struct InvalidUpdateTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; void *shared_ptr = nullptr; ur_exp_command_buffer_command_handle_t command_handle = nullptr; @@ -94,7 +94,7 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -145,7 +145,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -197,76 +197,76 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); } -// Test setting `pNewLocalWorkSize` to a non-NULL value when the command was -// created with a NULL local work size gives the correct error. -TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { - // Append kernel command to command-buffer using NULL local work size - ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, - &second_command_handle)); - ASSERT_NE(second_command_handle, nullptr); - - EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - finalized = true; - - size_t new_global_size = 64; - size_t new_local_size = 16; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - n_dimensions, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize - }; - - // Update command local size to non-NULL when created with NULL value - ur_result_t result = urCommandBufferUpdateKernelLaunchExp( - second_command_handle, &update_desc); - EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); - - if (second_command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(second_command_handle)); - } -} - -// Test setting `pNewLocalWorkSize` to a NULL value when the command was -// created with a non-NULL local work size gives the correct error. -TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - finalized = true; - - size_t new_global_size = 64; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - n_dimensions, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update command local size to NULL when created with non-NULL value - ur_result_t result = - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); - ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); -} +//// Test setting `pNewLocalWorkSize` to a non-NULL value when the command was +//// created with a NULL local work size gives the correct error. +//TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { +// // Append kernel command to command-buffer using NULL local work size +// ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; +// ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( +// updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, +// &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, +// &second_command_handle)); +// ASSERT_NE(second_command_handle, nullptr); +// +// EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); +// finalized = true; +// +// size_t new_global_size = 64; +// size_t new_local_size = 16; +// ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { +// UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype +// nullptr, // pNext +// kernel, // hNewKernel +// 0, // numNewMemObjArgs +// 0, // numNewPointerArgs +// 0, // numNewValueArgs +// n_dimensions, // newWorkDim +// nullptr, // pNewMemObjArgList +// nullptr, // pNewPointerArgList +// nullptr, // pNewValueArgList +// nullptr, // pNewGlobalWorkOffset +// &new_global_size, // pNewGlobalWorkSize +// &new_local_size, // pNewLocalWorkSize +// }; +// +// // Update command local size to non-NULL when created with NULL value +// ur_result_t result = urCommandBufferUpdateKernelLaunchExp( +// second_command_handle, &update_desc); +// EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +// +// if (second_command_handle) { +// EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(second_command_handle)); +// } +//} + +//// Test setting `pNewLocalWorkSize` to a NULL value when the command was +//// created with a non-NULL local work size gives the correct error. +//TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { +// ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); +// finalized = true; +// +// size_t new_global_size = 64; +// ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { +// UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype +// nullptr, // pNext +// kernel, // hNewKernel +// 0, // numNewMemObjArgs +// 0, // numNewPointerArgs +// 0, // numNewValueArgs +// n_dimensions, // newWorkDim +// nullptr, // pNewMemObjArgList +// nullptr, // pNewPointerArgList +// nullptr, // pNewValueArgList +// nullptr, // pNewGlobalWorkOffset +// &new_global_size, // pNewGlobalWorkSize +// nullptr, // pNewLocalWorkSize +// }; +// +// // Update command local size to NULL when created with non-NULL value +// ur_result_t result = +// urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); +// ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +//} // If the kernel handle is not being updated, then it's invalid to change // the number of dimensions. diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index dd3f17a90a..946be64165 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -99,7 +99,7 @@ struct NDRangeUpdateTest } static constexpr size_t elements_per_id = 6; - static constexpr size_t n_dimensions = 3; + static constexpr uint32_t n_dimensions = 3; static constexpr std::array global_size = {8, 8, 8}; static constexpr std::array local_size = {1, 2, 2}; static constexpr std::array global_offset = {0, 4, 4}; @@ -132,7 +132,7 @@ TEST_P(NDRangeUpdateTest, Update3D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -177,7 +177,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -222,7 +222,7 @@ TEST_P(NDRangeUpdateTest, Update1D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index b437971e9a..ad631ff6f2 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -71,7 +71,7 @@ struct USMFillCommandTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; void *shared_ptr = nullptr; void *new_shared_ptr = nullptr; @@ -120,17 +120,17 @@ TEST_P(USMFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize }; // Update kernel and enqueue command-buffer again @@ -178,7 +178,7 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -330,7 +330,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 21f21afa11..1735efdd74 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -152,7 +152,7 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList @@ -258,7 +258,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList @@ -324,7 +324,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList From 4a646337556159d0da4888f0f07c41b512cd29ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 10 Sep 2024 16:04:22 +0100 Subject: [PATCH 12/14] Fix rebase mistake --- test/conformance/exp_command_buffer/fixtures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 2cc91c4d3f..6852ce0ff7 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -148,6 +148,9 @@ struct urUpdatableCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); auto requiredCapabilities = UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | From 9ff534c05c91d66f081c5be3cbcbcea6569cdc0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 10 Sep 2024 16:21:36 +0100 Subject: [PATCH 13/14] Add testing for new supported behaviour --- include/ur_api.h | 10 +- scripts/core/exp-command-buffer.yml | 4 +- source/adapters/mock/ur_mockddi.cpp | 4 +- source/loader/layers/tracing/ur_trcddi.cpp | 4 +- source/loader/layers/validation/ur_valddi.cpp | 4 +- source/loader/ur_ldrddi.cpp | 4 +- source/loader/ur_libapi.cpp | 4 +- source/ur_api.cpp | 4 +- .../exp_command_buffer_adapter_hip.match | 4 - ...xp_command_buffer_adapter_native_cpu.match | 5 +- .../update/invalid_update.cpp | 72 ------- .../update/ndrange_update.cpp | 196 ++++++++++++++---- 12 files changed, 180 insertions(+), 135 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 5f7405bb57..4ff1f0b49d 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8286,10 +8286,8 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { ///< values that describe the number of global work-items. size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the number of work-items that make up a - ///< work-group. If newWorkDim is non-zero and pNewLocalWorkSize is - ///< nullptr, then runtime implementation will choose the work-group size. - ///< If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local - ///< work size is unchanged. + ///< work-group. If pNewLocalWorkSize is nullptr, then the local work size + ///< is unchanged. } ur_exp_command_buffer_update_kernel_launch_desc_t; @@ -8427,7 +8425,9 @@ urCommandBufferAppendKernelLaunchExp( uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index ee394a6ecc..5fefd3ce09 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -226,7 +226,7 @@ members: desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items." - type: "size_t*" name: pNewLocalWorkSize - desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If newWorkDim is non-zero and pNewLocalWorkSize is nullptr, then runtime implementation will choose the work-group size. If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local work size is unchanged." + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If pNewLocalWorkSize is nullptr, then the local work size is unchanged." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -333,7 +333,7 @@ params: desc: "[in] Global work size to use when executing kernel." - type: "const size_t*" name: pLocalWorkSize - desc: "[in][optional] Local work size to use when executing kernel." + desc: "[in][optional] Local work size to use when executing kernel. If this parameter is nullptr, then a local work size will be generated by the implementation." - type: uint32_t name: "numKernelAlternatives" desc: "[in] The number of kernel alternatives provided in phKernelAlternatives." diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 714bf7817c..f298aae04c 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8349,7 +8349,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index a3f48fd533..8c8d9dcca4 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6493,7 +6493,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index dbca114c58..446e3fc86b 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8054,7 +8054,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index ddcb63cda1..51e75111dd 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7105,7 +7105,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index f3f6b27bba..5638a0b46b 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7544,7 +7544,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 8c005ecb5d..dae7e2950d 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6400,7 +6400,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match index a39a452d04..e69de29bb2 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -1,4 +0,0 @@ -urCommandBufferKernelHandleUpdateTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.UpdateAgain/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index a4b2789372..765a5d44c9 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -13,8 +13,6 @@ {{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_ {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} @@ -23,7 +21,8 @@ {{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Invalid/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index 2ebd328630..6c9c2b3dce 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -197,77 +197,6 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); } -//// Test setting `pNewLocalWorkSize` to a non-NULL value when the command was -//// created with a NULL local work size gives the correct error. -//TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { -// // Append kernel command to command-buffer using NULL local work size -// ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; -// ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( -// updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, -// &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, -// &second_command_handle)); -// ASSERT_NE(second_command_handle, nullptr); -// -// EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); -// finalized = true; -// -// size_t new_global_size = 64; -// size_t new_local_size = 16; -// ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { -// UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype -// nullptr, // pNext -// kernel, // hNewKernel -// 0, // numNewMemObjArgs -// 0, // numNewPointerArgs -// 0, // numNewValueArgs -// n_dimensions, // newWorkDim -// nullptr, // pNewMemObjArgList -// nullptr, // pNewPointerArgList -// nullptr, // pNewValueArgList -// nullptr, // pNewGlobalWorkOffset -// &new_global_size, // pNewGlobalWorkSize -// &new_local_size, // pNewLocalWorkSize -// }; -// -// // Update command local size to non-NULL when created with NULL value -// ur_result_t result = urCommandBufferUpdateKernelLaunchExp( -// second_command_handle, &update_desc); -// EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); -// -// if (second_command_handle) { -// EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(second_command_handle)); -// } -//} - -//// Test setting `pNewLocalWorkSize` to a NULL value when the command was -//// created with a non-NULL local work size gives the correct error. -//TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { -// ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); -// finalized = true; -// -// size_t new_global_size = 64; -// ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { -// UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype -// nullptr, // pNext -// kernel, // hNewKernel -// 0, // numNewMemObjArgs -// 0, // numNewPointerArgs -// 0, // numNewValueArgs -// n_dimensions, // newWorkDim -// nullptr, // pNewMemObjArgList -// nullptr, // pNewPointerArgList -// nullptr, // pNewValueArgList -// nullptr, // pNewGlobalWorkOffset -// &new_global_size, // pNewGlobalWorkSize -// nullptr, // pNewLocalWorkSize -// }; -// -// // Update command local size to NULL when created with non-NULL value -// ur_result_t result = -// urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); -// ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); -//} - // If the kernel handle is not being updated, then it's invalid to change // the number of dimensions. TEST_P(InvalidUpdateTest, InvalidDimensions) { @@ -291,7 +220,6 @@ TEST_P(InvalidUpdateTest, InvalidDimensions) { nullptr, // pNewLocalWorkSize }; - // Update command local size to NULL when created with non-NULL value ur_result_t result = urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index 946be64165..e2da3b4bf1 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -29,30 +29,34 @@ struct NDRangeUpdateTest std::memset(shared_ptr, 0, allocation_size); ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); - - // Add a 3 dimension kernel command to command-buffer and close - // command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, - global_offset.data(), global_size.data(), local_size.data(), 0, - nullptr, 0, nullptr, nullptr, &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + // + // // Add a 3 dimension kernel command to command-buffer and close + // // command-buffer + // ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + // updatable_cmd_buf_handle, kernel, n_dimensions, + // global_offset.data(), global_size.data(), local_size.data(), 0, + // nullptr, 0, nullptr, nullptr, &command_handle)); + // ASSERT_NE(command_handle, nullptr); + // + // ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); } // For each work-item the kernel prints the global id and local id in each // of the 3 dimensions to an offset in the output based on global linear // id. void Validate(std::array global_size, - std::array local_size, + std::optional> local_size, std::array global_offset) { // DPC++ swaps the X & Z dimension for 3 Dimensional kernels // between those set by user and SPIR-V builtins. // See `ReverseRangeDimensionsForKernel()` in commands.cpp std::swap(global_size[0], global_size[2]); - std::swap(local_size[0], local_size[2]); + + if (local_size.has_value()) { + std::swap(local_size.value()[0], local_size.value()[2]); + } + std::swap(global_offset[0], global_offset[2]); // Verify global ID and local ID of each work item @@ -73,13 +77,15 @@ struct NDRangeUpdateTest EXPECT_EQ(global_id_y, y + global_offset[1]); EXPECT_EQ(global_id_z, z + global_offset[2]); - const int local_id_x = wi_ptr[3]; - const int local_id_y = wi_ptr[4]; - const int local_id_z = wi_ptr[5]; + if (local_size.has_value()) { + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; - EXPECT_EQ(local_id_x, x % local_size[0]); - EXPECT_EQ(local_id_y, y % local_size[1]); - EXPECT_EQ(local_id_z, z % local_size[2]); + EXPECT_EQ(local_id_x, x % local_size.value()[0]); + EXPECT_EQ(local_id_y, y % local_size.value()[1]); + EXPECT_EQ(local_id_z, z % local_size.value()[2]); + } } } } @@ -112,10 +118,17 @@ struct NDRangeUpdateTest UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTest); -// Keep the kernel work dimensions as 3, and update local size and global -// offset. +// Add a 3 dimension kernel command to the command-buffer and update the +// local size and global offset TEST_P(NDRangeUpdateTest, Update3D) { - // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update and verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); ASSERT_SUCCESS(urQueueFinish(queue)); @@ -152,9 +165,17 @@ TEST_P(NDRangeUpdateTest, Update3D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to use 1 in the Z dimension, -// and update global size, local size, and global offset to new values. +// Add a 3 dimension kernel command to the command-buffer. Update the kernel +// work dimensions to be 1 in the Z dimension, and update global size, local +// size, and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -201,10 +222,18 @@ TEST_P(NDRangeUpdateTest, Update2D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to be 1 in Y & Z dimensions, and check -// that the previously set global size, local size, and global offset update +// Add a 3 dimension kernel command to the command-buffer. Update the kernel +// work dimensions to be 1 in the Y & Z dimensions, and check that the +// previously set global size, local size, and global offset update // accordingly. TEST_P(NDRangeUpdateTest, Update1D) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -246,27 +275,108 @@ TEST_P(NDRangeUpdateTest, Update1D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Test error code is returned if work dimension parameter changes -TEST_P(NDRangeUpdateTest, Invalid) { - const size_t new_work_dim = n_dimensions - 1; +// Test that setting `pNewLocalWorkSize` to a non-NULL value when the command +// was created with a NULL local work size works. +TEST_P(NDRangeUpdateTest, ImplToUserDefinedLocalSize) { + + // Append a kernel node without setting the local work-size. + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), nullptr, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Can't validate the local size because it is generated by the + // implementation. + Validate(global_size, std::nullopt, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + std::array new_global_size = global_size; + + // Set a user-defined local work-size in the update desc. ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - new_work_dim, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize }; - // Update command to command-buffer to use different work dim - ur_result_t result = - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); - ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that the user defined local work-size was set correctly. + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Test that setting `pNewLocalWorkSize` to a NULL value when the command was +// created with a non-NULL local work size works. +TEST_P(NDRangeUpdateTest, UserToImplDefinedLocalSize) { + + // Append a kernel node and set a user defined local work-size. + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update and verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_global_offset = {3, 2, 1}; + std::array new_global_size = global_size; + + // Do not set a local-work size in the update desc to let the implementation + // decide which local-work size should be used. + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that the kernel ran successfully and the global size and the + // local size is unchanged + Validate(new_global_size, local_size, new_global_offset); } From 4b1a4d1891170c1ca89f2d9134182133bf2ffae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 10 Sep 2024 18:44:56 +0100 Subject: [PATCH 14/14] OpenCL update and cleanup --- source/adapters/cuda/command_buffer.cpp | 26 ---------------- source/adapters/cuda/device.cpp | 1 - source/adapters/hip/command_buffer.cpp | 31 ------------------- source/adapters/hip/device.cpp | 24 +++++++------- source/adapters/level_zero/command_buffer.cpp | 21 +------------ source/adapters/level_zero/device.cpp | 16 ---------- source/adapters/opencl/command_buffer.cpp | 31 ++++++------------- source/adapters/opencl/command_buffer.hpp | 11 ++++--- source/adapters/opencl/device.cpp | 13 -------- .../update/ndrange_update.cpp | 14 +-------- 10 files changed, 29 insertions(+), 159 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index d9899b8f30..e5f2a7fcce 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -887,37 +887,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - // const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - // if (!NewWorkDim) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - - // if (NewWorkDim) { - // UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - // UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - if (UpdateCommandDesc->newWorkDim != Command->WorkDim && Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } - // // Error If Local size and not global size - // if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - // (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - - // // Error if local size non-nullptr and created with null - // // or if local size nullptr and created with non-null - // const bool IsNewLocalSizeNull = - // UpdateCommandDesc->pNewLocalWorkSize == nullptr; - // const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); - // - // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - // } - if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 7daf8bdbc8..bcdb1cdc85 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1093,7 +1093,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:*/ return ReturnValue(true); case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 86552881d1..e7b84c32a7 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -18,7 +18,6 @@ #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" -#include #include @@ -873,41 +872,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - // const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - // if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - - // if (NewWorkDim) { - // UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - // UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - std::cerr << "HERE" << std::endl; - std::cerr << UpdateCommandDesc->newWorkDim << std::endl; - std::cerr << Command->WorkDim << std::endl; - if (UpdateCommandDesc->newWorkDim != Command->WorkDim && Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } - // // Error If Local size and not global size - // if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - // (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - - // // Error if local size non-nullptr and created with null - // // or if local size nullptr and created with non-null - // const bool IsNewLocalSizeNull = - // UpdateCommandDesc->pNewLocalWorkSize == nullptr; - // const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); - // - // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - // } - if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index b3b211af5a..ab5819eced 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -903,20 +903,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + int DriverVersion = 0; + UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: */ { - int DriverVersion = 0; - UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - - // Return supported for the UR command-buffer experimental feature on - // ROCM 5.5.1 and later. This is to workaround HIP driver bug - // https://github.com/ROCm/HIP/issues/2450 in older versions. - // - // The version is returned as (10000000 major + 1000000 minor + patch). - const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 - return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); - } + // Return supported for the UR command-buffer experimental feature on + // ROCM 5.5.1 and later. This is to workaround HIP driver bug + // https://github.com/ROCm/HIP/issues/2450 in older versions. + // + // The version is returned as (10000000 major + 1000000 minor + patch). + const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 + return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { int DriverVersion = 0; UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 9eee0d07e9..403195d511 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1320,36 +1320,17 @@ ur_result_t validateCommandDesc( ->mutableCommandFlags; logger::debug("Mutable features supported by device {}", SupportedFeatures); - // kernel handle updates are not yet supported. + // Kernel handle updates are not yet supported. if (CommandDesc->hNewKernel != Command->Kernel) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - // uint32_t Dim = CommandDesc->newWorkDim; - // if (Dim != 0) { // Error if work dim changes if (CommandDesc->hNewKernel == Command->Kernel && CommandDesc->newWorkDim != Command->WorkDim) { return UR_RESULT_ERROR_INVALID_OPERATION; } - // // Error If Local size and not global size - // if ((CommandDesc->pNewLocalWorkSize != nullptr) && - // (CommandDesc->pNewGlobalWorkSize == nullptr)) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - - // // Error if local size non-nullptr and created with null - // // or if local size nullptr and created with non-null - // const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == - // nullptr; const bool IsOriginalLocalSizeNull = - // !Command->UserDefinedLocalSize; - // - // if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - // return UR_RESULT_ERROR_INVALID_OPERATION; - // } - // } - // Check if new global offset is provided. size_t *NewGlobalWorkOffset = CommandDesc->pNewGlobalWorkOffset; UR_ASSERT(!NewGlobalWorkOffset || diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 463054c735..481cfec69c 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -994,22 +994,6 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); - // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // // Update support requires being able to update kernel arguments and - // all - // // aspects of the kernel NDRange. - // const ze_mutable_command_exp_flags_t UpdateMask = - // ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; - // - // const bool KernelArgUpdateSupport = - // (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & - // UpdateMask) == UpdateMask; - // return ReturnValue(KernelArgUpdateSupport && - // Device->Platform->ZeMutableCmdListExt.Supported); - // } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { const bool ZeMutableCommandFlags = Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags; diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 571d14f5d8..76d68b6e37 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -178,7 +178,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( try { auto URCommandHandle = std::make_unique( - hCommandBuffer, CommandHandle, workDim, pLocalWorkSize != nullptr); + hCommandBuffer, CommandHandle, hKernel, workDim, + pLocalWorkSize != nullptr); *phCommandHandle = URCommandHandle.release(); hCommandBuffer->CommandHandles.push_back(*phCommandHandle); } catch (...) { @@ -488,6 +489,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { + // Kernel handle updates are not yet supported. + if (pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); @@ -500,27 +506,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) return UR_RESULT_ERROR_INVALID_OPERATION; - if (cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim) { - // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = !hCommand->UserDefinedLocalSize; - - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + if (pUpdateKernelLaunch->newWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; } // Find the CL USM pointer arguments to the kernel to update diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index 4c39b1ad74..d8e975a3df 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -17,6 +17,8 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t hCommandBuffer; /// OpenCL command-handle. cl_mutable_command_khr CLMutableCommand; + /// Kernel associated with this command handle + ur_kernel_handle_t Kernel; /// Work-dimension the command was originally created with. cl_uint WorkDim; /// Set to true if the user set the local work size on command creation. @@ -31,11 +33,12 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t hCommandBuffer, - cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim, - bool UserDefinedLocalSize) + cl_mutable_command_khr CLMutableCommand, ur_kernel_handle_t Kernel, + cl_uint WorkDim, bool UserDefinedLocalSize) : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), - WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), - RefCountInternal(0), RefCountExternal(0) {} + Kernel(Kernel), WorkDim(WorkDim), + UserDefinedLocalSize(UserDefinedLocalSize), RefCountInternal(0), + RefCountExternal(0) {} uint32_t incrementInternalReferenceCount() noexcept { return ++RefCountInternal; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d8e0fc4e1f..7d7fd7c4ed 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1065,19 +1065,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(ExtStr.find("cl_khr_command_buffer") != std::string::npos); } - // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // cl_device_id Dev = cl_adapter::cast(hDevice); - // ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; - // CL_RETURN_ON_FAILURE( - // deviceSupportsURCommandBufferKernelUpdate(Dev, - // UpdateCapabilities)); - // ur_device_command_buffer_update_capability_flags_t - // RequiredCapabilities = - // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | - // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_WORKGROUP; - // return ReturnValue((UpdateCapabilities & RequiredCapabilities) == - // RequiredCapabilities); - // } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index e2da3b4bf1..5d0a81d567 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -29,16 +29,6 @@ struct NDRangeUpdateTest std::memset(shared_ptr, 0, allocation_size); ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); - // - // // Add a 3 dimension kernel command to command-buffer and close - // // command-buffer - // ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - // updatable_cmd_buf_handle, kernel, n_dimensions, - // global_offset.data(), global_size.data(), local_size.data(), 0, - // nullptr, 0, nullptr, nullptr, &command_handle)); - // ASSERT_NE(command_handle, nullptr); - // - // ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); } // For each work-item the kernel prints the global id and local id in each @@ -47,16 +37,14 @@ struct NDRangeUpdateTest void Validate(std::array global_size, std::optional> local_size, std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels // between those set by user and SPIR-V builtins. // See `ReverseRangeDimensionsForKernel()` in commands.cpp - std::swap(global_size[0], global_size[2]); - if (local_size.has_value()) { std::swap(local_size.value()[0], local_size.value()[2]); } - std::swap(global_offset[0], global_offset[2]); // Verify global ID and local ID of each work item