diff --git a/runtime/runtime/src/Quidditch/dispatch/dispatch.c b/runtime/runtime/src/Quidditch/dispatch/dispatch.c index 37b5361..dea494c 100644 --- a/runtime/runtime/src/Quidditch/dispatch/dispatch.c +++ b/runtime/runtime/src/Quidditch/dispatch/dispatch.c @@ -1,17 +1,19 @@ #include "dispatch.h" +#include + #include #include #include +#include #include +#include #include #include #include #include -#include "iree/base/alignment.h" - // TODO: This should be cluster local. static struct worker_metadata_t { atomic_uint workers_waiting; @@ -67,10 +69,17 @@ int quidditch_dispatch_enter_worker_loop() { while (!worker_metadata.exit) { park_worker(); if (worker_metadata.exit) break; - + + read_csr(mcycle); if (configuredKernel(configuredEnvironment, configuredDispatchState, &configuredWorkgroupState[snrt_cluster_core_idx()])) error = true; + + // Required to make sure that we only read the mcycle once the FPU has + // actually finished. Otherwise, we are measuring cycles that are too short! + // This is only required for measurement, not in real programs. + snrt_fpu_fence(); + read_csr(mcycle); } snrt_interrupt_disable(IRQ_M_CLUSTER); diff --git a/runtime/samples/nsnet2/CMakeLists.txt b/runtime/samples/nsnet2/CMakeLists.txt index 5c2904c..fe36ded 100644 --- a/runtime/samples/nsnet2/CMakeLists.txt +++ b/runtime/samples/nsnet2/CMakeLists.txt @@ -9,4 +9,5 @@ target_link_libraries( samples_util nsnet2_module snRuntime + Quidditch::dispatch::dispatch ) diff --git a/runtime/samples/nsnet2/main.c b/runtime/samples/nsnet2/main.c index b77da64..7173a05 100644 --- a/runtime/samples/nsnet2/main.c +++ b/runtime/samples/nsnet2/main.c @@ -1,9 +1,13 @@ +#include + #include #include #include #include int main() { + if (!snrt_is_dm_core()) return quidditch_dispatch_enter_worker_loop(); + float data[161]; for (int i = 0; i < IREE_ARRAYSIZE(data); i++) { diff --git a/runtime/samples/util/run_model.c b/runtime/samples/util/run_model.c index ea6bb4e..7a5aca6 100644 --- a/runtime/samples/util/run_model.c +++ b/runtime/samples/util/run_model.c @@ -71,12 +71,6 @@ static iree_status_t setup_instance_and_device( } iree_status_t run_model(const model_config_t* config) { - if (!snrt_is_dm_core()) { - int ret = quidditch_dispatch_enter_worker_loop(); - if (!ret) return iree_ok_status(); - return iree_make_status(IREE_STATUS_UNKNOWN); - } - iree_allocator_t host_allocator = iree_allocator_system(); iree_vm_instance_t* vmInstance; diff --git a/runtime/samples/vec_multiply/CMakeLists.txt b/runtime/samples/vec_multiply/CMakeLists.txt index e081c5a..0f8bd22 100644 --- a/runtime/samples/vec_multiply/CMakeLists.txt +++ b/runtime/samples/vec_multiply/CMakeLists.txt @@ -7,5 +7,6 @@ target_link_libraries( samples_util simple_add_module snRuntime + Quidditch::dispatch::dispatch ) diff --git a/runtime/samples/vec_multiply/main.c b/runtime/samples/vec_multiply/main.c index 3116429..4e20342 100644 --- a/runtime/samples/vec_multiply/main.c +++ b/runtime/samples/vec_multiply/main.c @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -5,6 +7,7 @@ int main() { double data[4]; + if (!snrt_is_dm_core()) return quidditch_dispatch_enter_worker_loop(); for (int i = 0; i < IREE_ARRAYSIZE(data); i++) { data[i] = (i + 1); diff --git a/runtime/snitch_cluster/api/ssr_decls.h b/runtime/snitch_cluster/api/ssr_decls.h new file mode 100644 index 0000000..55b9003 --- /dev/null +++ b/runtime/snitch_cluster/api/ssr_decls.h @@ -0,0 +1,4 @@ + +#pragma once + +void snrt_fpu_fence(); diff --git a/runtime/snitch_cluster/rtl/src/snrt.c b/runtime/snitch_cluster/rtl/src/snrt.c index c25de2a..dd63f50 100644 --- a/runtime/snitch_cluster/rtl/src/snrt.c +++ b/runtime/snitch_cluster/rtl/src/snrt.c @@ -10,8 +10,11 @@ #include "riscv.c" #include "snitch_cluster_memory.c" #include "snitch_cluster_start.c" +#include "ssr.h" #include "stack_decls.h" #include "sync.c" #include "team.c" +extern void snrt_fpu_fence(); + uint32_t snrt_get_stack_size_per_core() { return 1 << SNRT_LOG2_STACK_SIZE; }