From ceb78c64a370b9e65154c0529738db734168c070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Wed, 14 Aug 2024 12:49:09 +0100 Subject: [PATCH] [samples] Speed up simulation by importing buffers This avoids allocating and then copying the input data. Mostly useful in short running tests where the VM overhead dominates. --- runtime/samples/nsnet2/nsnet2_util.c | 2 +- runtime/samples/util/run_model.c | 30 ++++++++++++++++++---------- runtime/samples/util/run_model.h | 3 ++- runtime/samples/vec_multiply/main.c | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/runtime/samples/nsnet2/nsnet2_util.c b/runtime/samples/nsnet2/nsnet2_util.c index e95c6de..874f5fe 100644 --- a/runtime/samples/nsnet2/nsnet2_util.c +++ b/runtime/samples/nsnet2/nsnet2_util.c @@ -14,7 +14,7 @@ int run_nsnet2_experiment( iree_hal_executable_library_query_fn_t implementation) { if (!snrt_is_dm_core()) return quidditch_dispatch_enter_worker_loop(); - double(*data)[161] = malloc(161 * sizeof(double)); + double(*data)[161] = aligned_alloc(64, 161 * sizeof(double)); for (int i = 0; i < IREE_ARRAYSIZE(*data); i++) { (*data)[i] = (i + 1); diff --git a/runtime/samples/util/run_model.c b/runtime/samples/util/run_model.c index 5729038..3138fc8 100644 --- a/runtime/samples/util/run_model.c +++ b/runtime/samples/util/run_model.c @@ -91,10 +91,13 @@ iree_status_t run_model(const model_config_t* config) { if (!iree_status_is_ok(result)) goto error_release_context; for (iree_host_size_t i = 0; i < config->num_inputs; i++) { - iree_const_byte_span_t span = iree_make_const_byte_span( - config->input_data[i], - config->input_sizes[i] * - iree_hal_element_dense_byte_count(config->element_type)); + iree_hal_external_buffer_t external_buffer = { + .type = IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION, + .flags = IREE_HAL_EXTERNAL_BUFFER_FLAG_NONE, + .size = config->input_sizes[i] * + iree_hal_element_dense_byte_count(config->element_type), + .handle.host_allocation = {(void*)config->input_data[i]}, + }; iree_hal_buffer_params_t params = { .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE, @@ -103,15 +106,20 @@ iree_status_t run_model(const model_config_t* config) { }; iree_hal_buffer_params_canonicalize(¶ms); - iree_hal_buffer_view_t* buffer = NULL; - result = iree_hal_buffer_view_allocate_buffer_copy( - device, iree_hal_device_allocator(device), config->input_ranks[i], - config->input_shapes[i], config->element_type, - IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, params, span, &buffer); - if (!iree_status_is_ok(result)) goto error_release_context; + iree_hal_buffer_t* buffer = NULL; + IREE_CHECK_OK(iree_hal_allocator_import_buffer( + iree_hal_device_allocator(device), params, &external_buffer, + iree_hal_buffer_release_callback_null(), &buffer)); + + iree_hal_buffer_view_t* buffer_view = NULL; + IREE_CHECK_OK(iree_hal_buffer_view_create( + buffer, config->input_ranks[i], config->input_shapes[i], + config->element_type, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, + host_allocator, &buffer_view)); + iree_hal_buffer_release(buffer); iree_vm_ref_t arg_buffer_view_ref; - arg_buffer_view_ref = iree_hal_buffer_view_move_ref(buffer); + arg_buffer_view_ref = iree_hal_buffer_view_move_ref(buffer_view); result = iree_vm_list_push_ref_retain(inputs, &arg_buffer_view_ref); if (!iree_status_is_ok(result)) goto error_release_context; } diff --git a/runtime/samples/util/run_model.h b/runtime/samples/util/run_model.h index 6c3f0b6..d09879c 100644 --- a/runtime/samples/util/run_model.h +++ b/runtime/samples/util/run_model.h @@ -24,7 +24,8 @@ typedef struct { /// Number of input tensors. iree_host_size_t num_inputs; - /// Input tensor data in dense row major encoding. + /// Input tensor data in dense row major encoding. Must be aligned to 64 + /// bytes. const void** input_data; /// Number of elements for each input in 'input_data'. const iree_host_size_t* input_sizes; diff --git a/runtime/samples/vec_multiply/main.c b/runtime/samples/vec_multiply/main.c index d9af40f..aa4913a 100644 --- a/runtime/samples/vec_multiply/main.c +++ b/runtime/samples/vec_multiply/main.c @@ -6,7 +6,7 @@ #include int main() { - double data[4]; + iree_alignas(64) double data[4]; if (!snrt_is_dm_core()) return quidditch_dispatch_enter_worker_loop(); for (int i = 0; i < IREE_ARRAYSIZE(data); i++) {