From b8a674dbc0840ba4966268125163b03fbad87ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Fri, 26 Jul 2024 19:43:12 +0200 Subject: [PATCH] Bump IREE version (#103) --- .../src/Quidditch/Target/ConvertToLLVM.cpp | 1 - iree | 2 +- .../Quidditch/command_buffer/command_buffer.c | 64 +++++++------------ runtime/runtime/src/Quidditch/device/device.c | 24 +++++-- 4 files changed, 42 insertions(+), 49 deletions(-) diff --git a/codegen/compiler/src/Quidditch/Target/ConvertToLLVM.cpp b/codegen/compiler/src/Quidditch/Target/ConvertToLLVM.cpp index ebc3b60..bdeba3e 100644 --- a/codegen/compiler/src/Quidditch/Target/ConvertToLLVM.cpp +++ b/codegen/compiler/src/Quidditch/Target/ConvertToLLVM.cpp @@ -1035,7 +1035,6 @@ void ConvertToLLVMPass::runOnOperation() { populateVectorToSCFConversionPatterns(patterns); populateVectorToLLVMMatrixConversionPatterns(typeConverter, patterns); populateVectorToLLVMConversionPatterns(typeConverter, patterns, false); - populateReconcileUnrealizedCastsPatterns(patterns); populateSnitchToLLVMConversionPatterns(module, typeConverter, patterns); HALDispatchABI abi(&typeConverter); diff --git a/iree b/iree index db7974c..456d80c 160000 --- a/iree +++ b/iree @@ -1 +1 @@ -Subproject commit db7974c26549e3700923244ac1847b032013f898 +Subproject commit 456d80c51930ccc03ce0488e98238e5e0a14b403 diff --git a/runtime/runtime/src/Quidditch/command_buffer/command_buffer.c b/runtime/runtime/src/Quidditch/command_buffer/command_buffer.c index c402c9b..ec65996 100644 --- a/runtime/runtime/src/Quidditch/command_buffer/command_buffer.c +++ b/runtime/runtime/src/Quidditch/command_buffer/command_buffer.c @@ -126,7 +126,9 @@ iree_status_t quidditch_command_buffer_initialize( memset(command_buffer, 0, sizeof(*command_buffer)); iree_hal_command_buffer_initialize( - device, mode, command_categories, queue_affinity, binding_capacity, + iree_hal_device_allocator(device), mode, command_categories, + queue_affinity, binding_capacity, + (uint8_t*)command_buffer + sizeof(*command_buffer), &quidditch_command_buffer_vtable, &command_buffer->base); command_buffer->host_allocator = host_allocator; quidditch_command_buffer_reset(command_buffer); @@ -301,7 +303,8 @@ static iree_status_t quidditch_command_buffer_wait_events( //===----------------------------------------------------------------------===// static iree_status_t quidditch_command_buffer_discard_buffer( - iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) { + iree_hal_command_buffer_t* base_command_buffer, + iree_hal_buffer_ref_t buffer) { // Could be treated as a cache invalidation as it indicates we won't be using // the existing buffer contents again. return iree_ok_status(); @@ -313,10 +316,10 @@ static iree_status_t quidditch_command_buffer_discard_buffer( static iree_status_t quidditch_command_buffer_fill_buffer( iree_hal_command_buffer_t* base_command_buffer, - iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, - iree_device_size_t length, const void* pattern, + iree_hal_buffer_ref_t target_buffer, const void* pattern, iree_host_size_t pattern_length) { - return iree_hal_buffer_map_fill(target_buffer, target_offset, length, pattern, + return iree_hal_buffer_map_fill(target_buffer.buffer, target_buffer.offset, + target_buffer.length, pattern, pattern_length); } @@ -326,11 +329,10 @@ static iree_status_t quidditch_command_buffer_fill_buffer( static iree_status_t quidditch_command_buffer_update_buffer( iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer, - iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer, - iree_device_size_t target_offset, iree_device_size_t length) { + iree_host_size_t source_offset, iree_hal_buffer_ref_t target_buffer) { return iree_hal_buffer_map_write( - target_buffer, target_offset, - (const uint8_t*)source_buffer + source_offset, length); + target_buffer.buffer, target_buffer.offset, + (const uint8_t*)source_buffer + source_offset, target_buffer.length); } //===----------------------------------------------------------------------===// @@ -339,11 +341,10 @@ static iree_status_t quidditch_command_buffer_update_buffer( static iree_status_t quidditch_command_buffer_copy_buffer( iree_hal_command_buffer_t* base_command_buffer, - iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset, - iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, - iree_device_size_t length) { - return iree_hal_buffer_map_copy(source_buffer, source_offset, target_buffer, - target_offset, length); + iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) { + return iree_hal_buffer_map_copy(source_ref.buffer, source_ref.offset, + target_ref.buffer, target_ref.offset, + target_ref.length); } //===----------------------------------------------------------------------===// @@ -353,8 +354,8 @@ static iree_status_t quidditch_command_buffer_copy_buffer( static iree_status_t quidditch_command_buffer_collective( iree_hal_command_buffer_t* base_command_buffer, iree_hal_channel_t* channel, iree_hal_collective_op_t op, uint32_t param, - iree_hal_buffer_binding_t send_binding, - iree_hal_buffer_binding_t recv_binding, iree_device_size_t element_count) { + iree_hal_buffer_ref_t send_binding, iree_hal_buffer_ref_t recv_binding, + iree_device_size_t element_count) { return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "collectives not yet implemented on CPU"); } @@ -393,8 +394,7 @@ static iree_status_t quidditch_command_buffer_push_constants( static iree_status_t quidditch_command_buffer_push_descriptor_set( iree_hal_command_buffer_t* base_command_buffer, iree_hal_pipeline_layout_t* pipeline_layout, uint32_t set, - iree_host_size_t binding_count, - const iree_hal_descriptor_set_binding_t* bindings) { + iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings) { quidditch_command_buffer_t* command_buffer = quidditch_command_buffer_cast(base_command_buffer); @@ -406,12 +406,12 @@ static iree_status_t quidditch_command_buffer_push_descriptor_set( iree_host_size_t binding_base = set * IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT; for (iree_host_size_t i = 0; i < binding_count; ++i) { - if (IREE_UNLIKELY(bindings[i].binding >= + if (IREE_UNLIKELY(bindings[i].ordinal >= IREE_HAL_LOCAL_MAX_DESCRIPTOR_BINDING_COUNT)) { return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "buffer binding index out of bounds"); } - iree_host_size_t binding_ordinal = binding_base + bindings[i].binding; + iree_host_size_t binding_ordinal = binding_base + bindings[i].ordinal; // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc. iree_hal_buffer_mapping_t buffer_mapping = {{0}}; @@ -555,13 +555,12 @@ typedef union iree_hal_vec3_t { static iree_status_t quidditch_command_buffer_dispatch_indirect( iree_hal_command_buffer_t* base_command_buffer, iree_hal_executable_t* executable, int32_t entry_point, - iree_hal_buffer_t* workgroups_buffer, - iree_device_size_t workgroups_offset) { + iree_hal_buffer_ref_t workgroups_ref) { // TODO(benvanik): track mapping so we can properly map/unmap/flush/etc. iree_hal_buffer_mapping_t buffer_mapping = {{0}}; IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range( - workgroups_buffer, IREE_HAL_MAPPING_MODE_PERSISTENT, - IREE_HAL_MEMORY_ACCESS_READ, workgroups_offset, 3 * sizeof(uint32_t), + workgroups_ref.buffer, IREE_HAL_MAPPING_MODE_PERSISTENT, + IREE_HAL_MEMORY_ACCESS_READ, workgroups_ref.offset, 3 * sizeof(uint32_t), &buffer_mapping)); iree_hal_vec3_t workgroup_count = *(const iree_hal_vec3_t*)buffer_mapping.contents.data; @@ -570,22 +569,6 @@ static iree_status_t quidditch_command_buffer_dispatch_indirect( workgroup_count.y, workgroup_count.z); } -//===----------------------------------------------------------------------===// -// iree_hal_command_buffer_execute_commands -//===----------------------------------------------------------------------===// - -static iree_status_t quidditch_command_buffer_execute_commands( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_command_buffer_t* base_commands, - iree_hal_buffer_binding_table_t binding_table) { - // TODO(#10144): decide how to execute the inline command buffer; it is - // definitely a deferred command buffer but we don't want to force that - // dependency here. We could allow injection of a function to call to execute - // command buffers so that the device can decide how it wants to handle them. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "indirect command buffers not yet implemented"); -} - //===----------------------------------------------------------------------===// // iree_hal_command_buffer_vtable_t //===----------------------------------------------------------------------===// @@ -610,5 +593,4 @@ static const iree_hal_command_buffer_vtable_t quidditch_command_buffer_vtable = .push_descriptor_set = quidditch_command_buffer_push_descriptor_set, .dispatch = quidditch_command_buffer_dispatch, .dispatch_indirect = quidditch_command_buffer_dispatch_indirect, - .execute_commands = quidditch_command_buffer_execute_commands, }; diff --git a/runtime/runtime/src/Quidditch/device/device.c b/runtime/runtime/src/Quidditch/device/device.c index 6c07dce..dd7b45c 100644 --- a/runtime/runtime/src/Quidditch/device/device.c +++ b/runtime/runtime/src/Quidditch/device/device.c @@ -239,8 +239,9 @@ static iree_status_t quidditch_device_create_command_buffer( } else { quidditch_device_t* device = quidditch_device_cast(base_device); return iree_hal_deferred_command_buffer_create( - base_device, mode, command_categories, binding_capacity, - &device->large_block_pool, device->host_allocator, out_command_buffer); + iree_hal_device_allocator(base_device), mode, command_categories, + binding_capacity, &device->large_block_pool, device->host_allocator, + out_command_buffer); } } @@ -340,7 +341,8 @@ static iree_status_t quidditch_device_queue_dealloca( static iree_status_t quidditch_device_apply_deferred_command_buffers( quidditch_device_t* device, iree_host_size_t command_buffer_count, - iree_hal_command_buffer_t* const* command_buffers) { + iree_hal_command_buffer_t* const* command_buffers, + iree_hal_buffer_binding_table_t const* binding_tables) { // See if there are any deferred command buffers; this saves us work in cases // of pure inline execution. bool any_deferred = false; @@ -361,12 +363,21 @@ static iree_status_t quidditch_device_apply_deferred_command_buffers( // if they mixed the two modes together! for (iree_host_size_t i = 0; i < command_buffer_count; ++i) { iree_hal_command_buffer_t* command_buffer = command_buffers[i]; + iree_hal_buffer_binding_table_t binding_table = + binding_tables ? binding_tables[i] + : iree_hal_buffer_binding_table_empty(); if (iree_hal_deferred_command_buffer_isa(command_buffer)) { iree_hal_command_buffer_t* inline_command_buffer = NULL; IREE_RETURN_IF_ERROR(quidditch_command_buffer_initialize( (iree_hal_device_t*)device, iree_hal_command_buffer_mode(command_buffer) | - IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION, + IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT | + IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION | + // NOTE: we need to validate if a binding table is provided as the + // bindings were not known when it was originally recorded. + (iree_hal_buffer_binding_table_is_empty(binding_table) + ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED + : 0), IREE_HAL_COMMAND_CATEGORY_ANY, IREE_HAL_QUEUE_AFFINITY_ANY, /*binding_capacity=*/0, device->host_allocator, storage, &inline_command_buffer)); @@ -428,7 +439,8 @@ static iree_status_t quidditch_device_queue_execute( const iree_hal_semaphore_list_t wait_semaphore_list, const iree_hal_semaphore_list_t signal_semaphore_list, iree_host_size_t command_buffer_count, - iree_hal_command_buffer_t* const* command_buffers) { + iree_hal_command_buffer_t* const* command_buffers, + iree_hal_buffer_binding_table_t const* binding_tables) { quidditch_device_t* device = quidditch_device_cast(base_device); // TODO(#4680): there is some better error handling here needed; we should @@ -444,7 +456,7 @@ static iree_status_t quidditch_device_queue_execute( // Run all deferred command buffers - any we could have run inline we already // did during recording. IREE_RETURN_IF_ERROR(quidditch_device_apply_deferred_command_buffers( - device, command_buffer_count, command_buffers)); + device, command_buffer_count, command_buffers, binding_tables)); // Signal all semaphores now that batch work has completed. IREE_RETURN_IF_ERROR(quidditch_semaphore_multi_signal(