diff --git a/hardware/scripts/gen_benchmark_table.py b/hardware/scripts/gen_benchmark_table.py index 62fb90f55..91e3c98bb 100644 --- a/hardware/scripts/gen_benchmark_table.py +++ b/hardware/scripts/gen_benchmark_table.py @@ -15,6 +15,7 @@ import numpy as np import re + def create_dataframe(directory: str): keys = ['cycles', 'max_cycles', @@ -75,14 +76,19 @@ def create_dataframe(directory: str): filetext = open(filename).read() values = [] for key in keys: - values.append(re.findall(r'\b%s\b\s*[+-]?([0-9]*[.]?[0-9]+)' %(key), filetext)) + values.append( + re.findall( + r'\b%s\b\s*[+-]?([0-9]*[.]?[0-9]+)' % + (key), filetext)) df[subdir] = (np.asarray(values)).flatten() return df -def main (): + +def main(): script_path = pathlib.Path(__file__).parent.absolute() # Parse arguments - parser = argparse.ArgumentParser(description='Extract performance data from log files') + parser = argparse.ArgumentParser( + description='Extract performance data from log files') parser.add_argument( "-i", "--input", @@ -109,5 +115,6 @@ def main (): df = create_dataframe(args.input) df.to_excel(os.path.join(args.output, 'table.xls')) + if __name__ == "__main__": main() diff --git a/software/apps/barriers_test/data_barriers_test.h b/software/apps/barriers_test/data_barriers_test.h new file mode 100644 index 000000000..c5fa60839 --- /dev/null +++ b/software/apps/barriers_test/data_barriers_test.h @@ -0,0 +1,85 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +uint32_t core_delays[1024] = {}; diff --git a/software/apps/barriers_test/data_barriers_test.py b/software/apps/barriers_test/data_barriers_test.py index 82c2c53ef..190f03eea 100755 --- a/software/apps/barriers_test/data_barriers_test.py +++ b/software/apps/barriers_test/data_barriers_test.py @@ -14,13 +14,18 @@ # compute_result # ################## -def gen_data_header_file(outdir: pathlib.Path.cwd(), tpl: pathlib.Path.cwd(), **kwargs): + +def gen_data_header_file( + outdir: pathlib.Path.cwd(), + tpl: pathlib.Path.cwd(), + **kwargs): file = outdir / f"data_{kwargs['name']}.h" print(tpl, outdir, kwargs['name']) template = Template(filename=str(tpl)) with file.open('w') as f: f.write(template.render(**kwargs)) + def main(): parser = argparse.ArgumentParser(description='Generate data for kernels') parser.add_argument( @@ -78,20 +83,27 @@ def main(): default=1024, help='Max delay.' ) + args = parser.parse_args() num_cores = args.num_cores - ## Weybull distribution + + # Weybull distribution # a = args.a_par # D = args.d_par # delays = D * np.random.weibull(a, size=num_cores) # delays = np.asarray(delays, dtype = 'int') + # Uniform max_delay = args.max delays = np.random.uniform(low=0.0, high=max_delay, size=num_cores) - delays = np.asarray(delays, dtype = 'int') + delays = np.asarray(delays, dtype='int') - kwargs = {'name': 'barriers_test', 'delays': delays, 'num_cores' : num_cores} + kwargs = { + 'name': 'barriers_test', + 'delays': delays, + 'num_cores': num_cores} gen_data_header_file(args.outdir, args.tpl, **kwargs) + if __name__ == "__main__": main() diff --git a/software/runtime/synchronization.c b/software/runtime/synchronization.c index 15b2f54c1..7037d74f8 100644 --- a/software/runtime/synchronization.c +++ b/software/runtime/synchronization.c @@ -10,10 +10,13 @@ #include "runtime.h" #include "synchronization.h" -#if NUM_CORES == (256) +#if NUM_CORES == (16) +#define LOG2_NUM_CORES (4) +#elif NUM_CORES == (256) #define LOG2_NUM_CORES (8) #elif NUM_CORES == (1024) #define LOG2_NUM_CORES (10) + #endif uint32_t volatile barrier __attribute__((section(".l1"))); @@ -43,6 +46,11 @@ void mempool_barrier_init(uint32_t core_id) { /* PLAIN BARRIER */ +/** + @brief Central counter barrier + @param[in] num_cores Number of cores arriving at the barrier + @return none +*/ void mempool_barrier(uint32_t num_cores) { // Increment the barrier counter if ((num_cores - 1) == __atomic_fetch_add(&barrier, 1, __ATOMIC_RELAXED)) { @@ -55,6 +63,15 @@ void mempool_barrier(uint32_t num_cores) { mempool_wfi(); } +/** + @brief Central counter barrier with stride and offset + @param[in] barrier Pointer to the barrier variable (can be assigned + locally depending on the offset) + @param[in] num_cores Number of cores arriving at the barrier + @param[in] stride Stride between cores to wake up + @param[in] offset ID of the first core involved in the barrier + @return none +*/ void mempool_strided_barrier(uint32_t *barrier, uint32_t num_cores, uint32_t stride, uint32_t offset) { @@ -73,6 +90,12 @@ void mempool_strided_barrier(uint32_t *barrier, uint32_t num_cores, /* LOG BARRIER */ +/** + @brief Log2 tree barrier + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_log_barrier(uint32_t step, uint32_t core_id) { uint32_t idx = (step * (core_id / step)) * 4; uint32_t next_step, previous_step; @@ -95,9 +118,18 @@ void mempool_log_barrier(uint32_t step, uint32_t core_id) { mempool_wfi(); } +/** + @brief Tree barrier with any radix. In each step a central counter + barrier is used. + @param[in] radix log2(barrier radix), e.g. radix 2 -> 1 + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_anyradixlog_barrier(uint32_t radix, uint32_t core_id) { uint32_t num_cores = mempool_get_core_count(); - uint32_t first_step = (LOG2_NUM_CORES % radix) == 0 ? (1U << radix) : 1U << (LOG2_NUM_CORES % radix); + uint32_t first_step = (LOG2_NUM_CORES % radix) == 0 + ? (1U << radix) + : 1U << (LOG2_NUM_CORES % radix); uint32_t step = 0, previous_step = 0; // At first step you take care of the remainder uint32_t idx = (first_step * (core_id / first_step)) * 4; @@ -131,6 +163,13 @@ void mempool_anyradixlog_barrier(uint32_t radix, uint32_t core_id) { mempool_wfi(); } +/** + @brief Central counter barrier on a subset of cores + log2 tree + barrier + @param[in] step Number of cores in central counter barrier + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_linlog_barrier(uint32_t step, uint32_t core_id) { uint32_t idx = (step * (core_id / step)) * 4; @@ -154,6 +193,13 @@ void mempool_linlog_barrier(uint32_t step, uint32_t core_id) { mempool_wfi(); } +/** + @brief Log2 tree barrier with stride and offset + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] stride Stride between cores to wake up + @param[in] offset ID of the first core involved in the barrier + @return none +*/ void mempool_strided_log_barrier(uint32_t step, uint32_t core_id, uint32_t stride, uint32_t offset) { @@ -185,6 +231,13 @@ void mempool_strided_log_barrier(uint32_t step, uint32_t core_id, /* PARTIAL BARRIER */ +/** + @brief Log2 tree barrier on a subset of cores + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] core_id ID of the first core involved in the barrier + @param[in] num_cores_barrier Number of cores involved in the barrier + @return none +*/ void mempool_log_partial_barrier(uint32_t step, uint32_t core_id, uint32_t num_cores_barrier) { @@ -235,6 +288,14 @@ void mempool_log_partial_barrier(uint32_t step, uint32_t core_id, } } +/** + @brief Central counter barrier on a subset of cores + @param[in] core_id ID of the first core involved in the barrier + @param[in] core_init First core involved in the barrier + @param[in] num_sleeping_cores Number of cores involved in the barrier + @param[in] memloc Location of the barrier variable + @return none +*/ void mempool_partial_barrier(uint32_t volatile core_id, uint32_t volatile core_init, uint32_t volatile num_sleeping_cores,