From d90ab208ef9f253ac4a5e83608c9e3f5dcfc23bf Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 15 Nov 2023 15:56:54 +0000 Subject: [PATCH] [EXP][Command-Buffer] Add kernel command update This change introduces a new API that allows the kernel commands of a command-buffer to be updated with a new configuration. For example, modified arguments or ND-Range. The new API is defined in the following files and then source generated using scripts, so reviewers should look at: * `scripts/core/EXP-COMMAND-BUFFER.rst` * `scripts/core/exp-command-buffer.yml` See [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) as prior art. The differences between the proposed API and the above are: * Only the append kernel entry-point returns a command handle. I imagine this will be changed in future to enable other commands to do update. * USM, buffer, and scalar arguments can be updated, there is not equivalent update struct for `urKernelSetArgLocal` or `urKernelSetArgSampler` * There is no granularity of optional support for update, an implementer must either implement all the ways to update a kernel configuration, or none of them. The CUDA adapter is the only adapter that currently implements this new feature, other adapters don't report support. This is because CUDA is already an adapter supported by UR command-buffers, and the CUDA API for updating nodes already exists as a non-optional feature. Reviewers should review the changes in `source/adapters/cuda/` to evaluate this, CTS tests are written to verify implementation, as there is not yet a DPC++ feature with testing to stress the code path (see https://github.com/reble/llvm/pull/340 for how that feature could look). A new test directory has been created to test the command-buffer experimental feature, `test/conformance/exp_command_buffer`, which contains tests to stress using the feature defined by this extension so that it has code coverage. Reviewers should look at the new tests added here, and new device kernels in `test/conformance/device_code` to evaluate these changes. --- include/ur_api.h | 350 ++++++++++----- include/ur_ddi.h | 10 +- include/ur_print.hpp | 385 +++++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 137 +++++- scripts/core/exp-command-buffer.yml | 321 +++++++++++--- scripts/core/registry.yml | 3 + scripts/parse_specs.py | 4 +- source/adapters/cuda/command_buffer.cpp | 139 +++++- source/adapters/cuda/command_buffer.hpp | 88 ++++ source/adapters/cuda/device.cpp | 4 + source/adapters/cuda/ur_interface_loader.cpp | 1 + source/adapters/hip/command_buffer.cpp | 11 +- source/adapters/hip/device.cpp | 4 + source/adapters/hip/ur_interface_loader.cpp | 1 + source/adapters/level_zero/command_buffer.cpp | 9 +- source/adapters/level_zero/device.cpp | 5 + .../level_zero/ur_interface_loader.cpp | 1 + source/adapters/native_cpu/command_buffer.cpp | 9 +- source/adapters/native_cpu/device.cpp | 5 + .../native_cpu/ur_interface_loader.cpp | 1 + source/adapters/null/ur_nullddi.cpp | 153 ++++--- source/adapters/opencl/command_buffer.cpp | 12 +- source/adapters/opencl/device.cpp | 6 + .../adapters/opencl/ur_interface_loader.cpp | 1 + source/loader/layers/tracing/ur_trcddi.cpp | 161 ++++--- source/loader/layers/validation/ur_valddi.cpp | 158 ++++--- source/loader/ur_ldrddi.cpp | 199 ++++++--- source/loader/ur_ldrddi.hpp | 6 + source/loader/ur_libapi.cpp | 190 +++++---- source/ur_api.cpp | 181 ++++---- test/conformance/CMakeLists.txt | 1 + test/conformance/device_code/CMakeLists.txt | 3 + test/conformance/device_code/indexers_usm.cpp | 38 ++ test/conformance/device_code/saxpy.cpp | 33 ++ test/conformance/device_code/saxpy_usm.cpp | 25 ++ .../exp_command_buffer/CMakeLists.txt | 12 + .../buffer_fill_kernel_update.cpp | 397 ++++++++++++++++++ .../buffer_saxpy_kernel_update.cpp | 172 ++++++++ .../exp_command_buffer_adapter_cuda.match | 0 .../exp_command_buffer_adapter_hip.match | 13 + ...xp_command_buffer_adapter_level_zero.match | 0 .../exp_command_buffer_adapter_opencl.match | 0 .../exp_command_buffer/ndrange_update.cpp | 249 +++++++++++ .../usm_fill_kernel_update.cpp | 370 ++++++++++++++++ .../usm_saxpy_kernel_update.cpp | 158 +++++++ .../testing/include/uur/fixtures.h | 71 ++++ tools/urinfo/urinfo.hpp | 6 + 47 files changed, 3511 insertions(+), 592 deletions(-) create mode 100644 test/conformance/device_code/indexers_usm.cpp create mode 100644 test/conformance/device_code/saxpy.cpp create mode 100644 test/conformance/device_code/saxpy_usm.cpp create mode 100644 test/conformance/exp_command_buffer/CMakeLists.txt create mode 100644 test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match create mode 100644 test/conformance/exp_command_buffer/ndrange_update.cpp create mode 100644 test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp diff --git a/include/ur_api.h b/include/ur_api.h index 442c364e0c..aaf46c36f3 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -196,6 +196,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp @@ -224,48 +225,53 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001, ///< ::ur_exp_command_buffer_update_kernel_launch_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -483,6 +489,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid + UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003, ///< Handle to command-buffer command is invalid UR_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< Unknown or internal error /// @cond UR_RESULT_FORCE_UINT32 = 0x7fffffff @@ -1530,6 +1537,10 @@ typedef enum ur_device_info_t { ///< version than older devices. UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel + ///< commands in a command-buffer. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of @@ -7764,9 +7775,90 @@ typedef struct ur_exp_command_buffer_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. } ur_exp_command_buffer_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command memobj argument. +typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + ur_mem_handle_t hArgValue; ///< [in][optional] Handle of memory object. + +} ur_exp_command_buffer_update_memobj_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command pointer argument. +typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const void *pArgValue; ///< [in][optional] USM pointer to memory location holding the argument + ///< value. + +} ur_exp_command_buffer_update_pointer_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command value argument. +typedef struct ur_exp_command_buffer_update_value_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + uint32_t argSize; ///< [in] Argument size. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const void *pArgValue; ///< [in][optional] Argument value representing kernel arg type. + +} ur_exp_command_buffer_update_value_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating kernel command execution info. +typedef struct ur_exp_command_buffer_update_exec_info_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. + size_t propSize; ///< [in] Size of execution attribute. + const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. + const void *pPropValue; ///< [in] Pointer to memory location holding the property value. + +} ur_exp_command_buffer_update_exec_info_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel launch command. +typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t numMemobjArgs; ///< [in] Length of pArgMemobjList. + uint32_t numPointerArgs; ///< [in] Length of pArgPointerList. + uint32_t numValueArgs; ///< [in] Length of pArgValueList. + uint32_t numExecInfos; ///< [in] Length of pExecInfoList. + uint32_t workDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. + const ur_exp_command_buffer_update_memobj_arg_desc_t *pArgMemobjList; ///< [in][optional][range(0, numMemobjArgs)] An array describing the new + ///< kernel mem obj arguments for the command. + const ur_exp_command_buffer_update_pointer_arg_desc_t *pArgPointerList; ///< [in][optional][range(0, numPointerArgs)] An array describing the new + ///< kernel pointer arguments for the command. + const ur_exp_command_buffer_update_value_arg_desc_t *pArgValueList; ///< [in][optional][range(0, numValueArgs)]An array describing the new + ///< kernel value arguments for the command. + const ur_exp_command_buffer_update_exec_info_desc_t *pArgExecInfoList; ///< [in][optional] An array describing the execution info objects for the + ///< command. + size_t *pGlobalWorkOffset; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the offset used to calculate the global ID. + size_t *pGlobalWorkSize; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the number of global work-items. + size_t *pLocalWorkSize; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the number of work-items that make up a work-group. If + ///< nullptr, the runtime implementation will choose the work-group size. + +} ur_exp_command_buffer_update_kernel_launch_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief A value that identifies a command inside of a command-buffer, used for /// defining dependencies between commands in the same command-buffer. @@ -7776,11 +7868,15 @@ typedef uint32_t ur_exp_command_buffer_sync_point_t; /// @brief Handle of Command-Buffer object typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a Command-Buffer command +typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_command_handle_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7798,10 +7894,10 @@ typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor - ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] pointer to Command-Buffer handle + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. + ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ); /////////////////////////////////////////////////////////////////////////////// @@ -7819,7 +7915,7 @@ urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7838,7 +7934,7 @@ urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7857,11 +7953,11 @@ urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7888,19 +7984,20 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7925,17 +8022,17 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7974,7 +8071,7 @@ urCommandBufferAppendUSMFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7995,7 +8092,7 @@ urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -8003,11 +8100,11 @@ urCommandBufferAppendMemBufferCopyExp( size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8029,18 +8126,18 @@ urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - const void *pSrc, ///< [in] pointer to host memory where data is to be written from. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8062,18 +8159,18 @@ urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8094,7 +8191,7 @@ urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t srcOrigin, ///< [in] Origin for the region of data to be copied from the source. @@ -8106,11 +8203,11 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8132,26 +8229,26 @@ urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. - void *pSrc, ///< [in] pointer to host memory where data is to be written from. + void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8173,25 +8270,25 @@ urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8227,7 +8324,7 @@ urCommandBufferAppendMemBufferFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -8268,7 +8365,7 @@ urCommandBufferAppendUSMPrefetchExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -8330,17 +8427,48 @@ urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_queue_handle_t hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_queue_handle_t hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -10500,6 +10628,7 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -10693,6 +10822,15 @@ typedef struct ur_command_buffer_enqueue_exp_params_t { ur_event_handle_t **pphEvent; } ur_command_buffer_enqueue_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; +} ur_command_buffer_update_kernel_launch_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 77f2f35f70..f86bafd7a1 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1854,7 +1854,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -2011,6 +2012,12 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -2031,6 +2038,7 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; + ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 6b27b2a443..1f721203f4 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -58,6 +58,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -319,6 +321,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); /////////////////////////////////////////////////////////////////////////////// @@ -822,6 +829,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP"; + break; case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; @@ -999,6 +1009,21 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1214,6 +1239,31 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: { + const ur_exp_command_buffer_update_kernel_launch_desc_t *pstruct = (const ur_exp_command_buffer_update_kernel_launch_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: { + const ur_exp_command_buffer_update_memobj_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_memobj_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: { + const ur_exp_command_buffer_update_pointer_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_pointer_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: { + const ur_exp_command_buffer_update_value_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_value_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { + const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -1472,6 +1522,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; + case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: + os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP"; + break; case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -2401,6 +2454,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_ESIMD_SUPPORT: os << "UR_DEVICE_INFO_ESIMD_SUPPORT"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3809,6 +3868,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -9137,6 +9220,279 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".isUpdatable = "; + + os << (params.isUpdatable); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_memobj_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_memobj_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_memobj_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".hArgValue = "; + + ur::details::printPtr(os, + (params.hArgValue)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_pointer_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_pointer_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_pointer_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pArgValue = "; + + os << (params.pArgValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_value_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_value_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_value_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".argSize = "; + + os << (params.argSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pArgValue = "; + + os << (params.pArgValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { + os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".propName = "; + + os << (params.propName); + + os << ", "; + os << ".propSize = "; + + os << (params.propSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pPropValue = "; + + os << (params.pPropValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_kernel_launch_desc_t params) { + os << "(struct ur_exp_command_buffer_update_kernel_launch_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numMemobjArgs = "; + + os << (params.numMemobjArgs); + + os << ", "; + os << ".numPointerArgs = "; + + os << (params.numPointerArgs); + + os << ", "; + os << ".numValueArgs = "; + + os << (params.numValueArgs); + + os << ", "; + os << ".numExecInfos = "; + + os << (params.numExecInfos); + + os << ", "; + os << ".workDim = "; + + os << (params.workDim); + + os << ", "; + os << ".pArgMemobjList = {"; + for (size_t i = 0; (params.pArgMemobjList) != NULL && i < params.numMemobjArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgMemobjList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgPointerList = {"; + for (size_t i = 0; (params.pArgPointerList) != NULL && i < params.numPointerArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgPointerList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgValueList = {"; + for (size_t i = 0; (params.pArgValueList) != NULL && i < params.numValueArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgValueList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgExecInfoList = "; + + ur::details::printPtr(os, + (params.pArgExecInfoList)); + + os << ", "; + os << ".pGlobalWorkOffset = {"; + for (size_t i = 0; (params.pGlobalWorkOffset) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pGlobalWorkOffset))[i]; + } + os << "}"; + + os << ", "; + os << ".pGlobalWorkSize = {"; + for (size_t i = 0; (params.pGlobalWorkSize) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pGlobalWorkSize))[i]; + } + os << "}"; + + os << ", "; + os << ".pLocalWorkSize = {"; + for (size_t i = 0; (params.pLocalWorkSize) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pLocalWorkSize))[i]; + } + os << "}"; + os << "}"; return os; } @@ -14540,6 +14896,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -15239,6 +15601,26 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_kernel_launch_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".pUpdateKernelLaunch = "; + + ur::details::printPtr(os, + *(params->ppUpdateKernelLaunch)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -16373,6 +16755,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..afe081f7e2 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -57,24 +57,29 @@ returned list of supported extensions. ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returnedSize); - // Retrieve extension string + // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); - + std::string_view ExtensionsString(returnedExtensions.get()); - bool CmdBufferSupport = + bool CmdBufferSupport = ExtensionsString.find(${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP) != std::string::npos; +.. note:: + The ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP device info query exists to + serve the same purpose as ${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP. + Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. There are currently no unique members defined for -${x}_exp_command_buffer_desc_t, however they may be added in the future. +constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t +is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +command-buffer commands`. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -89,6 +94,11 @@ However, they differ in that they take a command-buffer handle instead of a queue handle, and the dependencies and return parameters are sync-points instead of event handles. +The entry-point for appending a kernel launch command also returns an optional +handle to the command being appended. Returning this handle does not extend the +lifetime of the parent command-buffer, and using the handle after the +command-buffer has been destroyed is invalid behaviour. + Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp @@ -103,9 +113,9 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp - + It is planned to eventually support any command type from the Core API which can -actually be appended to the equiavalent adapter native constructs. +actually be appended to the equivalent adapter native constructs. Sync-Points -------------------------------------------------------------------------------- @@ -122,15 +132,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); - + // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -147,6 +157,82 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); +Updating Command-Buffers Commands +-------------------------------------------------------------------------------- + +An adapter implementing the command-buffer experimental feature can optionally +support updating the configuration of kernel commands recorded to a +command-buffer. Support for this is reported by returning true in the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. + +Updating kernel commands is done by passing the new kernel configuration +to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of +the kernel command to update. Configurations that can be changed are the +parameters to the kernel and the execution ND-Range. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true // isUpdatable + }; + ${x}_exp_command_buffer_handle_t hCommandBuffer; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); + + // Append a kernel command which has two buffer parameters, an input + // and an output. + ${x}_exp_command_buffer_command_handle_t hCommand; + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, + nullptr, &hCommand); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Define kernel argument at index 0 to be a new input buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newInputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext, + 0, // argIndex, + nullptr, // pProperties + newInputBuffer, // hArgValue + }; + + // Define kernel argument at index 1 to be a new output buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newOutputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext, + 1, // argIndex, + nullptr, // pProperties + newOutputBuffer, // hArgValue + }; + + // Define the new configuration of the kernel command + ${x}_exp_command_buffer_update_memobj_arg_desc_t updatedArgs[2] = {newInputArg, newOutputArg}; + ${x}_exp_command_buffer_update_kernel_launch_desc_t update { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim; + new_args, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Perform the update + ${x}CommandBufferUpdateKernelLaunchExp(hCommand, &update); + + API -------------------------------------------------------------------------------- @@ -156,12 +242,21 @@ Macros Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP + * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP * ${x}_structure_type_t * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -182,15 +277,19 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - - + * ${X}_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_command_buffer_desc_t +* ${x}_exp_command_buffer_update_kernel_launch_desc_t +* ${x}_exp_command_buffer_update_memobj_arg_desc_t +* ${x}_exp_command_buffer_update_pointer_arg_desc_t +* ${x}_exp_command_buffer_update_value_arg_desc_t +* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t - +* ${x}_exp_command_buffer_command_handle_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -211,6 +310,7 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}CommandBufferUpdateKernelLaunchExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +327,8 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | A function definitions for kernel command update | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- @@ -234,3 +336,4 @@ Contributors * Ben Tracy `ben.tracy@codeplay.com `_ * Ewan Crawford `ewan@codeplay.com `_ * Maxime France-Pillois `maxime.francepillois@codeplay.com `_ +* Aaron Greig `aaron.greig@codeplay.com `_ diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..568b4896e0 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -11,6 +11,20 @@ type: header desc: "Intel $OneApi Unified Runtime Experimental APIs for Command-Buffers" ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support command-buffers." +name: $x_device_info_t +etors: + - name: COMMAND_BUFFER_SUPPORT_EXP + value: "0x1000" + desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." + - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + value: "0x1001" + desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -26,6 +40,9 @@ etors: - name: ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP value: "0x1002" desc: "Sync point wait list is invalid" + - name: ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + value: "0x1003" + desc: "Handle to command-buffer command is invalid" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -35,6 +52,22 @@ etors: - name: EXP_COMMAND_BUFFER_DESC desc: $x_exp_command_buffer_desc_t value: "0x1000" + - name: EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + desc: $x_exp_command_buffer_update_kernel_launch_desc_t + value: "0x1001" + - name: EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + desc: $x_exp_command_buffer_update_memobj_arg_desc_t + value: "0x1002" + - name: EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + desc: $x_exp_command_buffer_update_pointer_arg_desc_t + value: "0x1003" + - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + desc: $x_exp_command_buffer_update_value_arg_desc_t + value: "0x1004" + - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + desc: $x_exp_command_buffer_update_exec_info_desc_t + value: "0x1005" + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -54,7 +87,120 @@ type: struct desc: "Command-Buffer Descriptor Type" name: $x_exp_command_buffer_desc_t base: $x_base_desc_t -members: [] +members: + - type: $x_bool_t + name: isUpdatable + desc: "[in] Commands in a finalized command-buffer can be updated." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command memobj argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_memobj_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_mem_obj_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to memory object properties." + - type: $x_mem_handle_t + name: hArgValue + desc: "[in][optional] Handle of memory object." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command pointer argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_pointer_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_pointer_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to USM pointer properties." + - type: "const void *" + name: pArgValue + desc: "[in][optional] USM pointer to memory location holding the argument value." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command value argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_value_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: uint32_t + name: argSize + desc: "[in] Argument size." + - type: "const ur_kernel_arg_value_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to value properties." + - type: "const void *" + name: pArgValue + desc: "[in][optional] Argument value representing kernel arg type." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating kernel command execution info." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_exec_info_desc_t +members: + - type: ur_kernel_exec_info_t + name: propName + desc: "[in] Name of execution attribute." + - type: size_t + name: propSize + desc: "[in] Size of execution attribute." + - type: "const ur_kernel_exec_info_properties_t *" + name: pProperties + desc: "[in][optional] Pointer to execution info properties." + - type: "const void *" + name: pPropValue + desc: "[in] Pointer to memory location holding the property value." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel launch command." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_kernel_launch_desc_t +members: + - type: uint32_t + name: numMemobjArgs + desc: "[in] Length of pArgMemobjList." + - type: uint32_t + name: numPointerArgs + desc: "[in] Length of pArgPointerList." + - type: uint32_t + name: numValueArgs + desc: "[in] Length of pArgValueList." + - type: uint32_t + name: numExecInfos + desc: "[in] Length of pExecInfoList." + - type: uint32_t + name: workDim + desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." + - type: "const $x_exp_command_buffer_update_memobj_arg_desc_t*" + name: pArgMemobjList + desc: "[in][optional][range(0, numMemobjArgs)] An array describing the new kernel mem obj arguments for the command." + - type: "const $x_exp_command_buffer_update_pointer_arg_desc_t*" + name: pArgPointerList + desc: "[in][optional][range(0, numPointerArgs)] An array describing the new kernel pointer arguments for the command." + - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" + name: pArgValueList + desc: "[in][optional][range(0, numValueArgs)]An array describing the new kernel value arguments for the command." + - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" + name: pArgExecInfoList + desc: "[in][optional] An array describing the execution info objects for the command." + - type: "size_t*" + name: pGlobalWorkOffset + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the offset used to calculate the global ID." + - type: "size_t*" + name: pGlobalWorkSize + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the number of global work-items." + - type: "size_t*" + name: pLocalWorkSize + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -67,26 +213,31 @@ desc: "Handle of Command-Buffer object" class: $xCommandBuffer name: "$x_exp_command_buffer_handle_t" --- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of a Command-Buffer command" +class: $xCommandBuffer +name: "$x_exp_command_buffer_command_handle_t" +--- #-------------------------------------------------------------------------- type: function desc: "Create a Command-Buffer object" class: $xCommandBuffer name: CreateExp decl: static details: - - "Create a command-buffer object" + - "Create a command-buffer object." params: - type: $x_context_handle_t name: hContext - desc: "[in] handle of the context object" + desc: "[in] Handle of the context object." - type: $x_device_handle_t name: hDevice - desc: "[in] handle of the device object" + desc: "[in] Handle of the device object." - type: "const $x_exp_command_buffer_desc_t*" name: pCommandBufferDesc - desc: "[in][optional] CommandBuffer descriptor" + desc: "[in][optional] command-buffer descriptor." - type: "$x_exp_command_buffer_handle_t*" name: phCommandBuffer - desc: "[out] pointer to Command-Buffer handle" + desc: "[out] Pointer to command-Buffer handle." returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE @@ -100,7 +251,7 @@ name: RetainExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -113,7 +264,7 @@ name: ReleaseExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -126,26 +277,26 @@ name: FinalizeExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a kernel execution command to a command-buffer object" +desc: "Append a kernel execution command to a command-buffer object." class: $xCommandBuffer name: AppendKernelLaunchExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." - type: $x_kernel_handle_t name: hKernel - desc: "[in] kernel to append" + desc: "[in] Kernel to append." - type: uint32_t name: workDim - desc: "[in] dimension of the kernel execution" + desc: "[in] Dimension of the kernel execution." - type: "const size_t*" name: pGlobalWorkOffset desc: "[in] Offset to use when executing kernel." @@ -163,7 +314,11 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_KERNEL @@ -178,13 +333,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM memcpy command to a command-buffer object" +desc: "Append a USM memcpy command to a command-buffer object." class: $xCommandBuffer name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: "void*" name: pDst desc: "[in] Location the data will be copied to." @@ -193,7 +348,7 @@ params: desc: "[in] The data to be copied." - type: "size_t" name: size - desc: "[in] The number of bytes to copy" + desc: "[in] The number of bytes to copy." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -202,7 +357,8 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -217,7 +373,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM fill command to a command-buffer object" +desc: "Append a USM fill command to a command-buffer object." class: $xCommandBuffer name: AppendUSMFillExp params: @@ -262,13 +418,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory copy command to a command-buffer object" +desc: "Append a memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -292,7 +448,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -304,25 +460,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory write command to a command-buffer object" +desc: "Append a memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "const void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -331,7 +487,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -343,25 +499,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory read command to a command-buffer object" +desc: "Append a memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be written to." + desc: "[in] Pointer to host memory where data is to be written to." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -370,7 +526,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -382,13 +538,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory copy command to a command-buffer object" +desc: "Append a rectangular memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -424,7 +580,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -436,16 +592,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory write command to a command-buffer object" +desc: "Append a rectangular memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -457,19 +613,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being written." + desc: "[in] Length of each 2D slice in bytes in the buffer object being written." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pSrc." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pSrc." - type: "void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -478,7 +634,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -490,16 +646,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory read command to a command-buffer object" +desc: "Append a rectangular memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -511,19 +667,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being read." + desc: "[in] Length of each 2D slice in bytes in the buffer object being read." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pDst." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pDst." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be read into." + desc: "[in] Pointer to host memory where data is to be read into." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -532,7 +688,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -544,7 +700,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory fill command to a command-buffer object" +desc: "Append a memory fill command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferFillExp params: @@ -588,12 +744,12 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Prefetch command to a command-buffer object" +desc: "Append a USM Prefetch command to a command-buffer object." class: $xCommandBuffer name: AppendUSMPrefetchExp details: - - "Prefetching may not be supported for all devices or allocation types. If memory prefetching - is not supported, the prefetch hint will be ignored." + - "Prefetching may not be supported for all devices or allocation types. If + memory prefetching is not supported, the prefetch hint will be ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -630,12 +786,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Advise command to a command-buffer object" +desc: "Append a USM Advise command to a command-buffer object." class: $xCommandBuffer name: AppendUSMAdviseExp details: - - "Not all memory advice hints may be supported for all devices or allocation types. - If a memory advice hint is not supported, it will be ignored." + - "Not all memory advice hints may be supported for all devices or + allocation types. If a memory advice hint is not supported, it will be + ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -678,18 +835,18 @@ name: EnqueueExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_queue_handle_t name: hQueue - desc: "[in] the queue to submit this command-buffer for execution." + desc: "[in] The queue to submit this command-buffer for execution." - type: uint32_t name: numEventsInWaitList - desc: "[in] size of the event wait list" + desc: "[in] Size of the event wait list." - type: "const $x_event_handle_t*" name: phEventWaitList desc: | [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command-buffer execution. - If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. - type: $x_event_handle_t* name: phEvent desc: | @@ -704,3 +861,31 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Update a kernel launch command." +class: $xCommandBuffer +name: UpdateKernelLaunchExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer kernel command to update." + - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" + name: pUpdateKernelLaunch + desc: "[in] Struct defining how the kernel command is to be updated." + +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If update functionality is not supported by the device." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE + - $X_RESULT_ERROR_INVALID_ENUMERATION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..d0dece9e2d 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -502,6 +502,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xCommandBufferUpdateKernelLaunchExp + value: '182' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp value: '197' diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index 332af88cc7..493b6ddc88 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -286,8 +286,8 @@ def __validate_members(d, tags): if not annotation: raise Exception(prefix+"'desc' must start with {'[in]', '[out]', '[in,out]'}") - if type_traits.is_handle(item['type']): - raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) + #if type_traits.is_handle(item['type']): + # raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..9e29dd7f5a 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -250,7 +250,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -324,8 +325,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel->clearLocalSize(); // Get sync point and register the cuNode with it. - *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + auto NodeSP = std::make_shared(GraphNode); + if (pSyncPoint) { + *pSyncPoint = hCommandBuffer->AddSyncPoint(NodeSP); + } + + *phCommand = hCommandBuffer + ->AddCommandHandle(hKernel, NodeSP, NodeParams, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize) + .get(); } catch (ur_result_t Err) { Result = Err; } @@ -762,3 +771,127 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t *pKernelLaunch) { + // Kernel corresponding to the command to update + ur_kernel_handle_t Kernel = hCommand->Kernel; + + // Update pointer arguments to the kernel + uint32_t NumPointerArgs = pKernelLaunch->numPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pKernelLaunch->pArgPointerList; + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const auto &PointerArgDesc = ArgPointerList[i]; + uint32_t ArgIndex = PointerArgDesc.argIndex; + const void *ArgValue = PointerArgDesc.pArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update memobj arguments to the kernel + uint32_t NumMemobjArgs = pKernelLaunch->numMemobjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pKernelLaunch->pArgMemobjList; + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const auto &MemobjArgDesc = ArgMemobjList[i]; + uint32_t ArgIndex = MemobjArgDesc.argIndex; + ur_mem_handle_t ArgValue = MemobjArgDesc.hArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + if (ArgValue == nullptr) { + Kernel->setKernelArg(ArgIndex, 0, nullptr); + } else { + CUdeviceptr CuPtr = std::get(ArgValue->Mem).get(); + Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); + } + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update value arguments to the kernel + uint32_t NumValueArgs = pKernelLaunch->numValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pKernelLaunch->pArgValueList; + for (uint32_t i = 0; i < NumValueArgs; i++) { + const auto &ValueArgDesc = ArgValueList[i]; + uint32_t ArgIndex = ValueArgDesc.argIndex; + size_t ArgSize = ValueArgDesc.argSize; + const void *ArgValue = ValueArgDesc.pArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + + try { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Set the updated ND range + const uint32_t NewWorkDim = pKernelLaunch->workDim; + if (NewWorkDim != 0) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + hCommand->WorkDim = NewWorkDim; + } + + if (pKernelLaunch->pGlobalWorkOffset) { + hCommand->SetGlobalOffset(pKernelLaunch->pGlobalWorkOffset); + } + + if (pKernelLaunch->pGlobalWorkSize) { + hCommand->SetGlobalSize(pKernelLaunch->pGlobalWorkSize); + } + + if (pKernelLaunch->pLocalWorkSize) { + hCommand->SetLocalSize(pKernelLaunch->pLocalWorkSize); + } + + size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; + size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + size_t *LocalWorkSize = hCommand->LocalWorkSize; + uint32_t WorkDim = hCommand->WorkDim; + + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + CUfunction CuFunc = Kernel->get(); + ur_context_handle_t Context = hCommand->CommandBuffer->Context; + ur_device_handle_t Device = hCommand->CommandBuffer->Device; + auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, + GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + + Params.func = CuFunc; + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; + Params.sharedMemBytes = Kernel->getLocalSize(); + Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + + CUgraphNode Node = *(hCommand->Node); + CUgraphExec CudaGraphExec = hCommand->CommandBuffer->CudaGraphExec; + UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params)); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 18264410c4..74825870ba 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -175,6 +175,70 @@ static inline const char *getUrResultString(ur_result_t Result) { fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \ } +// Handle to a kernel command. +// +// Struct that stores all the information related to a kernel command in a +// command-buffer, such that the command can be recreated. When handles can +// be returned from other command types this struct will need refactored. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize) + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), + Params(Params), WorkDim(WorkDim) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, pGlobalWorkOffset, CopySize); + std::memcpy(GlobalWorkSize, pGlobalWorkSize, CopySize); + std::memcpy(LocalWorkSize, pLocalWorkSize, CopySize); + + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void SetGlobalOffset(const size_t *pGlobalWorkOffset) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, pGlobalWorkOffset, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + } + } + + void SetGlobalSize(const size_t *pGlobalWorkSize) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkSize, pGlobalWorkSize, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void SetLocalSize(const size_t *pLocalWorkSize) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(LocalWorkSize, pLocalWorkSize, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + ur_exp_command_buffer_handle_t CommandBuffer; + ur_kernel_handle_t Kernel; + std::shared_ptr Node; + CUDA_KERNEL_NODE_PARAMS Params; + + uint32_t WorkDim; + size_t GlobalWorkOffset[3]; + size_t GlobalWorkSize[3]; + size_t LocalWorkSize[3]; +}; + struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, @@ -202,6 +266,27 @@ struct ur_exp_command_buffer_handle_t_ { return SyncPoint; } + // Creates a UR command handle + // @param[in] Kernel UR kernel associated with this command. + // @param[in] Node CUDA Graph node associated with this command. + // @param[in] Params CUDA Kernel configuration associated with this node. + // @param[in] WorkDim Dimensions of the kernel execution. + // @param[in] GlobalWorkOffset Work item offset of the kernel execution. + // @param[in] GlobalWorkSize Global work size of the kernel execution. + // @param[in] LocalWorkSize Local work size of the kernel execution. + // @return Shared pointer to the created handle. + std::shared_ptr + AddCommandHandle(ur_kernel_handle_t Kernel, std::shared_ptr Node, + const CUDA_KERNEL_NODE_PARAMS &Params, uint32_t WorkDim, + const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, + const size_t *LocalWorkSize) { + + Handles.push_back(std::make_shared( + this, Kernel, Node, Params, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize)); + return Handles.back(); + } + // UR context associated with this command-buffer ur_context_handle_t Context; // Device associated with this command buffer @@ -222,6 +307,9 @@ struct ur_exp_command_buffer_handle_t_ { // is not enough) ur_exp_command_buffer_sync_point_t NextSyncPoint; + // List of command handles returned to the user. + std::vector> Handles; + // Used when retaining an object. uint32_t incrementReferenceCount() noexcept { return ++RefCount; } // Used when releasing an object. diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index acea59e1a1..004faad4ad 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1031,6 +1031,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(true); + default: break; } diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index f31ffe6d87..6a9d17c102 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -294,6 +294,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..20a7f508fe 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -46,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for the HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index e40470f9aa..21e88c4d6a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -837,6 +837,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: break; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7707e78425..550818adb4 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -291,6 +291,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bbe49cb705..d1a7970bf5 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -509,7 +509,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_exp_command_buffer_sync_point_t *SyncPoint, + ur_exp_command_buffer_command_handle_t *) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); @@ -986,3 +987,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 0b8e12c67a..ef9f212847 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -825,6 +825,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(result); } + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: urPrint("Unsupported ParamName in urGetDeviceInfo\n"); urPrint("ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..e6c5ef72eb 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -341,6 +341,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 50b38c9d52..6b7802041f 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -50,7 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_sync_point_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 3432ce780e..3a67b30c08 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -304,6 +304,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return ReturnValue(false); + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9408101927..d884539d3d 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -283,6 +283,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 464aa59d54..7d96140650 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4656,12 +4656,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4685,7 +4685,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4706,7 +4706,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4727,7 +4727,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4748,9 +4748,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -4761,8 +4761,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4773,9 +4775,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } else { // generic implementation + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + d_context.get()); + } } return result; @@ -4787,16 +4794,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4855,7 +4862,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -4865,8 +4872,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4890,18 +4897,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4925,17 +4932,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4959,7 +4966,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -4976,8 +4983,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5002,31 +5009,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5051,29 +5058,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5200,15 +5207,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5232,6 +5238,30 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnUpdateKernelLaunchExp = + d_context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr != pfnUpdateKernelLaunchExp) { + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5696,6 +5726,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + driver::urCommandBufferUpdateKernelLaunchExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..772dc8a4d3 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -104,7 +104,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -356,3 +357,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 27577eab39..c437dbc5c8 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -947,6 +947,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index ac2c33475b..2ca3d561b6 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -301,6 +301,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5867d295ae..3612098726 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5050,12 +5050,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -5082,7 +5082,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -5107,7 +5107,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -5132,7 +5132,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -5158,9 +5158,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -5171,8 +5171,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -5190,15 +5192,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &pSyncPoint, + &phCommand}; uint64_t instance = context.notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms); ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms, &result, @@ -5211,16 +5214,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -5294,7 +5297,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -5304,8 +5307,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -5343,18 +5346,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -5391,17 +5394,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -5438,7 +5441,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -5455,8 +5458,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -5500,31 +5503,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -5568,29 +5571,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -5774,15 +5777,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5809,6 +5811,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_kernel_launch_exp_params_t params = { + &hCommand, &pUpdateKernelLaunch}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms); + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6389,6 +6422,10 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index db59ca3b11..50e1d4b7e1 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -6753,12 +6753,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -6790,7 +6790,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -6817,7 +6817,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -6844,7 +6844,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -6867,9 +6867,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6880,8 +6880,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -6922,8 +6924,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); return result; } @@ -6932,16 +6934,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7056,7 +7058,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7066,8 +7068,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7109,18 +7111,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7162,17 +7164,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7214,7 +7216,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7231,8 +7233,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -7275,31 +7277,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -7342,29 +7344,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -7571,15 +7573,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7622,6 +7623,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pUpdateKernelLaunch) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8264,6 +8296,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index a3a4ccaaa0..0a63c9c35e 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -32,6 +32,7 @@ ur_exp_image_mem_factory_t ur_exp_image_mem_factory; ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; +ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet @@ -6502,12 +6503,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6548,7 +6549,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6576,7 +6577,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6604,7 +6605,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6632,9 +6633,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6645,8 +6646,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6669,10 +6672,26 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel = reinterpret_cast(hKernel)->handle; // forward to device-platform - result = pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + ur_exp_command_buffer_command_factory.getInstance( + *phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -6681,16 +6700,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6763,7 +6782,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6773,8 +6792,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6811,18 +6830,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6856,17 +6875,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6900,7 +6919,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6917,8 +6936,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6956,31 +6975,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7015,29 +7034,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7198,15 +7217,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7259,6 +7277,59 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateKernelLaunchExp = + dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // Deal with any struct parameters that have handle members we need to convert. + auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; + + std::vector + pUpdateKernelLaunchpArgMemobjList; + for (uint32_t i = 0; i < pUpdateKernelLaunch->numMemobjArgs; i++) { + ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = + pUpdateKernelLaunchLocal.pArgMemobjList[i]; + if (NewRangeStruct.hArgValue) { + NewRangeStruct.hArgValue = + reinterpret_cast(NewRangeStruct.hArgValue) + ->handle; + } + + pUpdateKernelLaunchpArgMemobjList.push_back(NewRangeStruct); + } + pUpdateKernelLaunchLocal.pArgMemobjList = + pUpdateKernelLaunchpArgMemobjList.data(); + + // Now that we've converted all the members update the param pointers + pUpdateKernelLaunch = &pUpdateKernelLaunchLocal; + + // forward to device-platform + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -7874,6 +7945,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_loader::urCommandBufferUpdateKernelLaunchExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::context->platforms.front() diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index 4edbabbd8b..d98b99a655 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -92,6 +92,12 @@ using ur_exp_command_buffer_factory_t = singleton_factory_t; +using ur_exp_command_buffer_command_object_t = + object_t; +using ur_exp_command_buffer_command_factory_t = + singleton_factory_t; + } // namespace ur_loader #endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index cd4a70c91e..d3bc064240 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7016,7 +7016,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7033,12 +7033,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { auto pfnCreateExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7066,7 +7066,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnRetainExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7095,7 +7095,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnReleaseExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7124,7 +7124,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnFinalizeExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7138,7 +7138,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7165,9 +7165,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7178,8 +7178,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendKernelLaunchExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7190,13 +7192,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7221,16 +7223,16 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7246,7 +7248,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7301,7 +7303,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7322,7 +7324,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7332,8 +7334,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7349,7 +7351,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7371,18 +7373,18 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7398,7 +7400,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7420,17 +7422,17 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7446,7 +7448,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7467,7 +7469,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7484,8 +7486,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7503,7 +7505,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7525,31 +7527,31 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7567,7 +7569,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7589,29 +7591,29 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7629,7 +7631,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7681,7 +7683,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -7737,7 +7739,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -7814,15 +7816,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7839,6 +7840,49 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + auto pfnUpdateKernelLaunchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 26f24aba08..3b63bde7c6 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5952,7 +5952,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5969,12 +5969,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5995,7 +5995,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6017,7 +6017,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6039,14 +6039,14 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6073,9 +6073,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6086,15 +6086,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6119,23 +6121,23 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6181,7 +6183,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6202,7 +6204,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6212,15 +6214,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6242,25 +6244,25 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6282,24 +6284,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6320,7 +6322,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6337,15 +6339,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6367,38 +6369,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6420,36 +6422,36 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6492,7 +6494,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -6539,7 +6541,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -6607,15 +6609,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6624,6 +6625,42 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 032edefe5a..f6f6a31a6c 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -120,6 +120,7 @@ if(UR_DPCXX) add_subdirectory(kernel) add_subdirectory(program) add_subdirectory(enqueue) + add_subdirectory(exp_command_buffer) else() message(WARNING "UR_DPCXX is not defined, the following conformance test executables \ diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 202109c3b5..450733d5ed 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -94,6 +94,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/usm_ll.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/indexers_usm.cpp b/test/conformance/device_code/indexers_usm.cpp new file mode 100644 index 0000000000..ee64fb39e0 --- /dev/null +++ b/test/conformance/device_code/indexers_usm.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + const cl::sycl::range<3> global_range(8, 8, 8); + const cl::sycl::range<3> local_range(2, 2, 2); + const cl::sycl::id<3> global_offset(4, 4, 4); + const cl::sycl::nd_range<3> nd_range(global_range, local_range, + global_offset); + + cl::sycl::queue sycl_queue; + const size_t elements_per_work_item = 6; + int *ptr = cl::sycl::malloc_shared(global_range[0] * global_range[1] * + global_range[2] * + elements_per_work_item, + sycl_queue); + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for( + nd_range, [ptr](cl::sycl::nd_item<3> index) { + int *wi_ptr = + ptr + index.get_global_linear_id() * elements_per_work_item; + + wi_ptr[0] = index.get_global_id(0); + wi_ptr[1] = index.get_global_id(1); + wi_ptr[2] = index.get_global_id(2); + + wi_ptr[3] = index.get_local_id(0); + wi_ptr[4] = index.get_local_id(1); + wi_ptr[5] = index.get_local_id(2); + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy.cpp b/test/conformance/device_code/saxpy.cpp new file mode 100644 index 0000000000..83ae11cb0a --- /dev/null +++ b/test/conformance/device_code/saxpy.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + std::vector X(array_size, 1); + std::vector Y(array_size, 2); + std::vector Z(array_size, 0); + uint32_t A = 42; + auto x_buff = + cl::sycl::buffer(X.data(), cl::sycl::range<1>(array_size)); + auto y_buff = + cl::sycl::buffer(Y.data(), cl::sycl::range<1>(array_size)); + auto z_buff = + cl::sycl::buffer(Z.data(), cl::sycl::range<1>(array_size)); + + cl::sycl::queue sycl_queue; + sycl_queue.submit([&](cl::sycl::handler &cgh) { + auto x_acc = x_buff.get_access(cgh); + auto y_acc = y_buff.get_access(cgh); + auto z_acc = z_buff.get_access(cgh); + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + z_acc[i] = A * x_acc[i] + y_acc[i]; + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy_usm.cpp b/test/conformance/device_code/saxpy_usm.cpp new file mode 100644 index 0000000000..6d93fd2f8b --- /dev/null +++ b/test/conformance/device_code/saxpy_usm.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + + cl::sycl::queue sycl_queue; + uint32_t *X = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Y = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Z = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t A = 42; + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + Z[i] = A * X[i] + Y[i]; + }); + }); + return 0; +} diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt new file mode 100644 index 0000000000..1a948803ee --- /dev/null +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright (C) 2023 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_conformance_test_with_kernels_environment(exp_command_buffer + buffer_fill_kernel_update.cpp + usm_fill_kernel_update.cpp + buffer_saxpy_kernel_update.cpp + usm_saxpy_kernel_update.cpp + ndrange_update.cpp +) diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp new file mode 100644 index 0000000000..196552b944 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -0,0 +1,397 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct BufferFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + // First argument is buffer to fill (will also be hidden accessor arg) + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + // Second argument is scalar to fill with. + AddPodArg(val); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (new_buffer) { + EXPECT_SUCCESS(urMemRelease(new_buffer)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t buffer_size = sizeof(val) * global_size; + ur_mem_handle_t buffer = nullptr; + ur_mem_handle_t new_buffer = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferFillCommandTest); + +// Update kernel arguments to fill with a new scalar value to a new output +// buffer. +TEST_P(BufferFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, buffer_size, val); + + // Create a new buffer to update kernel output parameter to + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Set argument index zero as new buffer + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + // Set argument index 2 as new value to fill (index 1 is buffer accessor) + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test updating the global size so that the fill outputs to a larger buffer +TEST_P(BufferFillCommandTest, UpdateGlobalSize) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, buffer_size, val); +} + +// Test updating the input & output kernel arguments and global +// size, by calling update individually for each of these configurations. +TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &output_update_desc)); + + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &input_update_desc)); + + ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + command_handle, &global_size_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test calling update twice on the same command-handle updating the +// input value, and verifying that it's the second call which persists. +TEST_P(BufferFillCommandTest, OverrideUpdate) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + uint32_t first_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t first_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &first_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &first_update_desc)); + + uint32_t second_val = -99; + ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &second_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} + +// Test calling update with multiple ur_exp_command_buffer_update_value_arg_desc_t +// instances updating the same argument, and checking that the last one in the +// list persists. +TEST_P(BufferFillCommandTest, OverrideArgList) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + ur_exp_command_buffer_update_value_arg_desc_t input_descs[2]; + uint32_t first_val = 33; + input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + uint32_t second_val = -99; + input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 2, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + input_descs, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..09049337d6 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -0,0 +1,172 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +// Test that updating a command-buffer with a single kernel command +// taking buffer & scalar arguments works correctly. +struct BufferSaxpyKernelTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "saxpy"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + ASSERT_NE(buffer, nullptr); + + std::vector init(allocation_size); + uur::generateMemFillPattern(init); + + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, + allocation_size, init.data(), + 0, nullptr, nullptr)); + } + + // Index 0 is output buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + // Index 1 is output accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + + // Index 2 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A)); + // Index 3 is X buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + + // Index 4 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr, + &accessor)); + // Index 5 is Y buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + + // Index 6 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr, + &accessor)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(ur_mem_handle_t output, ur_mem_handle_t X, ur_mem_handle_t Y, + uint32_t A, size_t length) { + + std::vector output_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, output, true, 0, length, + output_data.data(), 0, nullptr, + nullptr)); + + std::vector X_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, X, true, 0, length, X_data.data(), 0, nullptr, nullptr)); + + std::vector Y_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, Y, true, 0, length, Y_data.data(), 0, nullptr, nullptr)); + + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X_data[i] + Y_data[i]; + ASSERT_EQ(result, output_data[i]); + } + } + + void TearDown() override { + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array buffers = {nullptr, nullptr, nullptr, + nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferSaxpyKernelTests); + +TEST_P(BufferSaxpyKernelTests, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(buffers[0], buffers[1], buffers[2], A, global_size); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_input_descs[2]; + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + buffers[3], // hArgValue + }; + + // New Y at index 5 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 5, // argIndex + nullptr, // pProperties + buffers[4], // hArgValue + }; + + // A at index 2 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext, + 2, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + new_input_descs, // pArgMemobjList + nullptr, // pArgPointerList + &new_A_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(buffers[0], buffers[3], buffers[4], new_A, global_size); +} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match new file mode 100644 index 0000000000..6ebcfb6d7d --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,13 @@ +{{OPT}}BufferFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideUpdate/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideArgList/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateExecInfo/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferSaxpyKernelTests.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMSaxpyKernelTests.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update3D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update2D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update1D/AMD_HIP_BACKEND{{.*}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp new file mode 100644 index 0000000000..5ad98fc9d8 --- /dev/null +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// in a way that changes the NDRange configuration. +struct NDRangeUpdateTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "indexers_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate a USM pointer for use as kernel output at index 0 + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + + // Add a 3 dimension kernel command to command-buffer and close + // command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, + global_offset.data(), global_size.data(), local_size.data(), 0, + nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + // For each work-item the kernel prints the global id and local id in each + // of the 3 dimensions to an offset in the output based on global linear + // id. + void Validate(std::array global_size, + std::array local_size, + std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels + // between those set by user and SPIR-V builtins. + // See `ReverseRangeDimensionsForKernel()` in commands.cpp + + std::swap(global_size[0], global_size[2]); + std::swap(local_size[0], local_size[2]); + std::swap(global_offset[0], global_offset[2]); + + // Verify global ID and local ID of each work item + for (size_t x = 0; x < global_size[0]; x++) { + for (size_t y = 0; y < global_size[1]; y++) { + for (size_t z = 0; z < global_size[2]; z++) { + const size_t global_linear_id = + z + (y * global_size[2]) + + (x * global_size[1] * global_size[0]); + int *wi_ptr = (int *)shared_ptr + + (elements_per_id * global_linear_id); + + const int global_id_x = wi_ptr[0]; + const int global_id_y = wi_ptr[1]; + const int global_id_z = wi_ptr[2]; + +#if 0 + std::cout << "("<< x <<"," << y << "," << z << ") --> "; + std::cout << global_linear_id << " = "; + std::cout << "("<< global_id_x <<"," << global_id_y << "," + << global_id_z << ")\n"; +#endif + + EXPECT_EQ(global_id_x, x + global_offset[0]); + EXPECT_EQ(global_id_y, y + global_offset[1]); + EXPECT_EQ(global_id_z, z + global_offset[2]); + + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; + + EXPECT_EQ(local_id_x, x % local_size[0]); + EXPECT_EQ(local_id_y, y % local_size[1]); + EXPECT_EQ(local_id_z, z % local_size[2]); + } + } + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t elements_per_id = 6; + static constexpr size_t n_dimensions = 3; + static constexpr std::array global_size = {8, 8, 8}; + static constexpr std::array local_size = {1, 2, 2}; + static constexpr std::array global_offset = {0, 4, 4}; + static constexpr size_t allocation_size = sizeof(int) * elements_per_id * + global_size[0] * global_size[1] * + global_size[2]; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTests); + +// Keep the kernel work dimensions as 3, and update local size and global +// offset. +TEST_P(NDRangeUpdateTests, Update3D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 3, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + new_global_offset.data(), // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + new_local_size.data(), // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 2, and update global size, local size, +// and global offset to new values. +TEST_P(NDRangeUpdateTests, Update2D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set ND-Range configuration to update to + std::array new_global_size = {6, 6, 1}; + std::array new_local_size = {3, 3, 1}; + std::array new_global_offset = {3, 3, 0}; + + // Set dimensions as 2 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 2, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + new_global_offset.data(), // pGlobalWorkOffset + new_global_size.data(), // pGlobalWorkSize + new_local_size.data(), // pLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 1, and check that previously +// set global size, local size, and global offset update accordingly. +TEST_P(NDRangeUpdateTests, Update1D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set dimensions to 1 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 1, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + std::array new_global_size = {global_size[0], 1, 1}; + std::array new_local_size = {local_size[0], 1, 1}; + std::array new_global_offset = {global_offset[0], 0, 0}; + Validate(new_global_size, new_local_size, new_global_offset); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp new file mode 100644 index 0000000000..8dbfba0418 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -0,0 +1,370 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct USMFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); + +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Allocate a new USM pointer of larger size + size_t new_global_size = 64; + const size_t new_allocation_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + new_allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, new_allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 1, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + &new_output_desc, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); +} + +// Test updating the kernel execution info +TEST_P(USMFillCommandTest, UpdateExecInfo) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; + + // Update direct access flag + bool indirect_access = false; + new_exec_info_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName + sizeof(indirect_access), // propSize + nullptr, // pProperties + &indirect_access, // pPropValue + }; + + // Update cache config + ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; + new_exec_info_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName + sizeof(cache_config), // propSize + nullptr, // pProperties + &cache_config, // pPropValue + }; + + // Create a new USM allocation to set indirect access for + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + void *pointers = {new_shared_ptr}; + new_exec_info_descs[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_PTRS, // propName + sizeof(pointers), // propSize + nullptr, // pProperties + &pointers, // pPropValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 3, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + new_exec_info_descs, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify results are correct, although exec info modifications should + // have no effect on output + Validate((uint32_t *)shared_ptr, global_size, val); +} + +// Test updating a command-buffer with multiple USM fill kernel commands +struct USMMultipleFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Create a single USM allocation which will be used by all kernels + // by accessing at pointer offsets + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Append multiple kernel commands to command-buffer + for (size_t k = 0; k < num_kernels; k++) { + // Calculate offset into output allocation, and set as + // kernel output. + void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &offset_ptr)); + + // Each kernel has a unique fill value + uint32_t fill_val = val + k; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), + nullptr, &fill_val)); + + // Append kernel and store returned handle + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &elements, &local_size, 0, nullptr, nullptr, + &command_handles[k])); + ASSERT_NE(command_handles[k], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t num_kernels = 8; + static constexpr size_t elements = global_size / num_kernels; + + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + std::array + command_handles; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); + +// Test updating all the kernels commands in the command-buffer +TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptr; + for (size_t i = 0; i < global_size; i++) { + const uint32_t expected = val + (i / elements); + ASSERT_EQ(expected, output[i]); + } + + // Create a new USM allocation to update kernel outputs to + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Update each kernel in the command-buffer. + uint32_t new_val = 33; + for (size_t k = 0; k < num_kernels; k++) { + // Update output pointer to an offset into new USM allocation + void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &offset_ptr, // pArgValue + }; + + // Update fill value + uint32_t new_fill_val = new_val + k; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(int), // argSize + nullptr, // pProperties + &new_fill_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 1, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + &new_output_desc, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], + &update_desc)); + } + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *updated_output = (uint32_t *)new_shared_ptr; + for (size_t i = 0; i < global_size; i++) { + uint32_t expected = new_val + (i / elements); + ASSERT_EQ(expected, updated_output[i]) << i; + } +} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..d7f76cfc7f --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -0,0 +1,158 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM & scalar arguments works correctly. +struct USMSaxpyKernelTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, &shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, &shared_ptrs[2])); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTests); + +TEST_P(USMSaxpyKernelTests, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 2, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + new_input_descs, // pArgPointerList + &new_A_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index cf01015eb4..178cb2a89b 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1276,6 +1276,77 @@ struct urGlobalVariableTest : uur::urKernelExecutionTest { GlobalVar global_var; }; +struct urExpCommandBufferTest : urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urExpUpdatableCommandBufferTests : urExpCommandBufferTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urExpCommandBufferTest ::SetUp()); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(urExpCommandBufferTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; +}; + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_FIXTURES_H_INCLUDED diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d9677c3eab..b959e565c4 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -323,6 +323,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_ESIMD_SUPPORT); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;