diff --git a/hardware/scripts/gen_benchmark_table.py b/hardware/scripts/gen_benchmark_table.py index 62fb90f55..91e3c98bb 100644 --- a/hardware/scripts/gen_benchmark_table.py +++ b/hardware/scripts/gen_benchmark_table.py @@ -15,6 +15,7 @@ import numpy as np import re + def create_dataframe(directory: str): keys = ['cycles', 'max_cycles', @@ -75,14 +76,19 @@ def create_dataframe(directory: str): filetext = open(filename).read() values = [] for key in keys: - values.append(re.findall(r'\b%s\b\s*[+-]?([0-9]*[.]?[0-9]+)' %(key), filetext)) + values.append( + re.findall( + r'\b%s\b\s*[+-]?([0-9]*[.]?[0-9]+)' % + (key), filetext)) df[subdir] = (np.asarray(values)).flatten() return df -def main (): + +def main(): script_path = pathlib.Path(__file__).parent.absolute() # Parse arguments - parser = argparse.ArgumentParser(description='Extract performance data from log files') + parser = argparse.ArgumentParser( + description='Extract performance data from log files') parser.add_argument( "-i", "--input", @@ -109,5 +115,6 @@ def main (): df = create_dataframe(args.input) df.to_excel(os.path.join(args.output, 'table.xls')) + if __name__ == "__main__": main() diff --git a/software/apps/barriers_test/data_barriers_test.h b/software/apps/barriers_test/data_barriers_test.h new file mode 100644 index 000000000..c5fa60839 --- /dev/null +++ b/software/apps/barriers_test/data_barriers_test.h @@ -0,0 +1,85 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +uint32_t core_delays[1024] = { + 381, 327, 268, 849, 487, 748, 878, 412, 860, 889, 338, 310, 90, + 479, 497, 422, 871, 834, 532, 785, 51, 824, 682, 137, 198, 183, + 981, 17, 946, 59, 1, 859, 774, 70, 25, 484, 19, 364, 628, + 172, 275, 24, 791, 67, 376, 621, 726, 994, 737, 279, 602, 461, + 891, 389, 1017, 757, 973, 61, 505, 825, 727, 565, 251, 524, 436, + 30, 63, 216, 520, 88, 157, 324, 553, 594, 746, 749, 419, 715, + 985, 305, 969, 619, 420, 58, 1010, 524, 801, 923, 352, 56, 762, + 905, 735, 583, 56, 175, 672, 540, 665, 316, 550, 43, 758, 114, + 198, 809, 891, 326, 200, 747, 381, 462, 615, 976, 335, 587, 207, + 873, 913, 826, 522, 722, 764, 118, 606, 105, 1011, 374, 284, 847, + 766, 824, 171, 653, 925, 822, 176, 665, 407, 40, 128, 483, 20, + 290, 863, 378, 25, 277, 485, 578, 297, 723, 334, 567, 82, 209, + 115, 451, 468, 272, 135, 98, 307, 632, 147, 693, 231, 415, 772, + 273, 350, 424, 837, 663, 170, 954, 654, 169, 845, 76, 152, 492, + 88, 169, 529, 257, 890, 672, 569, 118, 885, 586, 232, 108, 942, + 849, 134, 775, 857, 747, 778, 507, 468, 266, 491, 455, 1023, 812, + 580, 737, 416, 474, 896, 829, 136, 836, 926, 470, 567, 419, 543, + 928, 691, 270, 384, 567, 401, 222, 279, 482, 814, 401, 13, 172, + 263, 188, 706, 413, 671, 433, 574, 648, 283, 885, 287, 698, 165, + 274, 220, 329, 989, 708, 355, 697, 427, 432, 838, 838, 527, 697, + 218, 167, 230, 816, 775, 479, 158, 162, 925, 580, 781, 450, 690, + 476, 292, 793, 624, 112, 553, 410, 953, 707, 608, 365, 691, 939, + 196, 801, 699, 180, 758, 457, 485, 882, 278, 142, 988, 867, 593, + 500, 816, 656, 36, 659, 464, 57, 192, 399, 305, 162, 305, 253, + 181, 325, 523, 18, 821, 171, 932, 982, 253, 313, 545, 514, 466, + 943, 892, 618, 765, 710, 35, 539, 253, 853, 907, 871, 609, 200, + 20, 743, 780, 732, 5, 616, 285, 484, 786, 227, 842, 990, 688, + 731, 660, 578, 707, 9, 936, 248, 218, 421, 828, 567, 768, 527, + 836, 805, 853, 836, 218, 88, 511, 764, 874, 644, 396, 213, 870, + 769, 221, 232, 744, 950, 21, 275, 528, 919, 620, 529, 712, 799, + 0, 713, 30, 403, 841, 257, 342, 58, 794, 882, 1014, 8, 406, + 737, 556, 173, 498, 726, 429, 426, 343, 520, 731, 1021, 89, 487, + 533, 300, 347, 246, 328, 234, 279, 953, 911, 408, 651, 571, 48, + 166, 36, 577, 80, 136, 69, 730, 507, 442, 502, 565, 953, 858, + 507, 511, 334, 619, 367, 305, 608, 568, 166, 608, 249, 383, 24, + 785, 722, 633, 31, 229, 949, 582, 897, 147, 295, 390, 242, 220, + 294, 68, 893, 321, 374, 336, 919, 262, 910, 603, 956, 105, 650, + 237, 151, 624, 421, 250, 695, 565, 770, 765, 765, 372, 836, 850, + 207, 612, 124, 662, 716, 745, 720, 1018, 272, 209, 100, 685, 198, + 729, 849, 461, 142, 1005, 416, 752, 100, 754, 971, 298, 363, 308, + 64, 390, 888, 593, 599, 97, 388, 746, 259, 428, 745, 939, 124, + 853, 98, 825, 229, 254, 233, 103, 499, 270, 628, 161, 369, 140, + 33, 615, 33, 646, 52, 505, 529, 1023, 734, 940, 992, 396, 43, + 463, 27, 416, 760, 90, 145, 876, 343, 650, 971, 150, 827, 930, + 897, 419, 666, 83, 1019, 81, 553, 108, 513, 634, 839, 882, 692, + 485, 550, 995, 864, 243, 258, 461, 968, 257, 154, 211, 406, 414, + 198, 823, 570, 256, 490, 137, 1018, 321, 456, 269, 79, 986, 794, + 265, 454, 749, 36, 888, 758, 18, 890, 993, 831, 438, 320, 110, + 398, 253, 944, 986, 696, 315, 491, 46, 979, 808, 466, 985, 486, + 267, 117, 1021, 557, 856, 178, 326, 952, 391, 1, 670, 636, 992, + 773, 483, 366, 765, 344, 737, 194, 1011, 521, 947, 63, 526, 535, + 516, 216, 134, 938, 74, 345, 864, 880, 576, 584, 525, 797, 759, + 27, 467, 622, 741, 545, 225, 34, 908, 403, 244, 997, 827, 81, + 717, 436, 906, 264, 27, 400, 968, 532, 365, 554, 228, 271, 944, + 774, 202, 138, 491, 34, 807, 53, 323, 785, 157, 144, 261, 613, + 444, 148, 482, 76, 372, 493, 634, 428, 756, 885, 229, 436, 855, + 256, 811, 146, 23, 542, 575, 921, 778, 669, 154, 913, 396, 377, + 290, 325, 532, 518, 406, 861, 204, 594, 64, 261, 369, 925, 531, + 1007, 666, 94, 323, 253, 330, 853, 932, 651, 404, 310, 477, 353, + 903, 339, 573, 210, 423, 548, 563, 285, 417, 972, 437, 87, 720, + 502, 585, 585, 366, 224, 737, 293, 672, 385, 289, 657, 259, 302, + 830, 788, 512, 248, 997, 655, 119, 699, 573, 266, 688, 343, 67, + 906, 438, 49, 701, 480, 928, 954, 1, 239, 350, 924, 808, 547, + 278, 131, 754, 924, 13, 32, 645, 272, 304, 743, 517, 473, 1014, + 592, 616, 434, 50, 200, 103, 563, 680, 274, 535, 271, 726, 270, + 72, 237, 450, 929, 396, 82, 404, 868, 709, 497, 954, 400, 996, + 506, 626, 236, 713, 927, 385, 278, 399, 898, 230, 625, 749, 620, + 827, 908, 541, 523, 759, 143, 436, 152, 611, 693, 576, 893, 245, + 462, 396, 297, 566, 774, 176, 393, 948, 1, 173, 313, 577, 963, + 942, 748, 845, 261, 189, 443, 1005, 910, 786, 993, 256, 972, 509, + 387, 996, 13, 106, 348, 780, 323, 330, 454, 674, 51, 706, 135, + 49, 618, 77, 472, 70, 156, 228, 583, 557, 525, 123, 951, 515, + 573, 309, 509, 899, 935, 728, 744, 831, 83, 414, 319, 447, 825, + 194, 662, 213, 36, 94, 860, 776, 477, 275, 244, 765, 518, 748, + 318, 165, 132, 750, 885, 699, 807, 453, 824, 438, 305, 373, 645, + 1017, 276, 151, 538, 959, 285, 14, 798, 794, 770, 853, 234, 274, + 468, 107, 268, 1, 730, 464, 249, 980, 19, 162, 923, 936, 150, + 549, 976, 645, 438, 63, 440, 61, 106, 804, 808, 542, 353, 955, + 645, 210, 1015, 107, 171, 860, 784, 54, 745, 626, +}; diff --git a/software/apps/barriers_test/data_barriers_test.py b/software/apps/barriers_test/data_barriers_test.py index 82c2c53ef..190f03eea 100755 --- a/software/apps/barriers_test/data_barriers_test.py +++ b/software/apps/barriers_test/data_barriers_test.py @@ -14,13 +14,18 @@ # compute_result # ################## -def gen_data_header_file(outdir: pathlib.Path.cwd(), tpl: pathlib.Path.cwd(), **kwargs): + +def gen_data_header_file( + outdir: pathlib.Path.cwd(), + tpl: pathlib.Path.cwd(), + **kwargs): file = outdir / f"data_{kwargs['name']}.h" print(tpl, outdir, kwargs['name']) template = Template(filename=str(tpl)) with file.open('w') as f: f.write(template.render(**kwargs)) + def main(): parser = argparse.ArgumentParser(description='Generate data for kernels') parser.add_argument( @@ -78,20 +83,27 @@ def main(): default=1024, help='Max delay.' ) + args = parser.parse_args() num_cores = args.num_cores - ## Weybull distribution + + # Weybull distribution # a = args.a_par # D = args.d_par # delays = D * np.random.weibull(a, size=num_cores) # delays = np.asarray(delays, dtype = 'int') + # Uniform max_delay = args.max delays = np.random.uniform(low=0.0, high=max_delay, size=num_cores) - delays = np.asarray(delays, dtype = 'int') + delays = np.asarray(delays, dtype='int') - kwargs = {'name': 'barriers_test', 'delays': delays, 'num_cores' : num_cores} + kwargs = { + 'name': 'barriers_test', + 'delays': delays, + 'num_cores': num_cores} gen_data_header_file(args.outdir, args.tpl, **kwargs) + if __name__ == "__main__": main() diff --git a/software/runtime/synchronization.c b/software/runtime/synchronization.c index 15b2f54c1..7037d74f8 100644 --- a/software/runtime/synchronization.c +++ b/software/runtime/synchronization.c @@ -10,10 +10,13 @@ #include "runtime.h" #include "synchronization.h" -#if NUM_CORES == (256) +#if NUM_CORES == (16) +#define LOG2_NUM_CORES (4) +#elif NUM_CORES == (256) #define LOG2_NUM_CORES (8) #elif NUM_CORES == (1024) #define LOG2_NUM_CORES (10) + #endif uint32_t volatile barrier __attribute__((section(".l1"))); @@ -43,6 +46,11 @@ void mempool_barrier_init(uint32_t core_id) { /* PLAIN BARRIER */ +/** + @brief Central counter barrier + @param[in] num_cores Number of cores arriving at the barrier + @return none +*/ void mempool_barrier(uint32_t num_cores) { // Increment the barrier counter if ((num_cores - 1) == __atomic_fetch_add(&barrier, 1, __ATOMIC_RELAXED)) { @@ -55,6 +63,15 @@ void mempool_barrier(uint32_t num_cores) { mempool_wfi(); } +/** + @brief Central counter barrier with stride and offset + @param[in] barrier Pointer to the barrier variable (can be assigned + locally depending on the offset) + @param[in] num_cores Number of cores arriving at the barrier + @param[in] stride Stride between cores to wake up + @param[in] offset ID of the first core involved in the barrier + @return none +*/ void mempool_strided_barrier(uint32_t *barrier, uint32_t num_cores, uint32_t stride, uint32_t offset) { @@ -73,6 +90,12 @@ void mempool_strided_barrier(uint32_t *barrier, uint32_t num_cores, /* LOG BARRIER */ +/** + @brief Log2 tree barrier + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_log_barrier(uint32_t step, uint32_t core_id) { uint32_t idx = (step * (core_id / step)) * 4; uint32_t next_step, previous_step; @@ -95,9 +118,18 @@ void mempool_log_barrier(uint32_t step, uint32_t core_id) { mempool_wfi(); } +/** + @brief Tree barrier with any radix. In each step a central counter + barrier is used. + @param[in] radix log2(barrier radix), e.g. radix 2 -> 1 + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_anyradixlog_barrier(uint32_t radix, uint32_t core_id) { uint32_t num_cores = mempool_get_core_count(); - uint32_t first_step = (LOG2_NUM_CORES % radix) == 0 ? (1U << radix) : 1U << (LOG2_NUM_CORES % radix); + uint32_t first_step = (LOG2_NUM_CORES % radix) == 0 + ? (1U << radix) + : 1U << (LOG2_NUM_CORES % radix); uint32_t step = 0, previous_step = 0; // At first step you take care of the remainder uint32_t idx = (first_step * (core_id / first_step)) * 4; @@ -131,6 +163,13 @@ void mempool_anyradixlog_barrier(uint32_t radix, uint32_t core_id) { mempool_wfi(); } +/** + @brief Central counter barrier on a subset of cores + log2 tree + barrier + @param[in] step Number of cores in central counter barrier + @param[in] core_id ID of the core arriving at the barrier + @return none +*/ void mempool_linlog_barrier(uint32_t step, uint32_t core_id) { uint32_t idx = (step * (core_id / step)) * 4; @@ -154,6 +193,13 @@ void mempool_linlog_barrier(uint32_t step, uint32_t core_id) { mempool_wfi(); } +/** + @brief Log2 tree barrier with stride and offset + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] stride Stride between cores to wake up + @param[in] offset ID of the first core involved in the barrier + @return none +*/ void mempool_strided_log_barrier(uint32_t step, uint32_t core_id, uint32_t stride, uint32_t offset) { @@ -185,6 +231,13 @@ void mempool_strided_log_barrier(uint32_t step, uint32_t core_id, /* PARTIAL BARRIER */ +/** + @brief Log2 tree barrier on a subset of cores + @param[in] step Step of the logarithmic tree (must be set to 2) + @param[in] core_id ID of the first core involved in the barrier + @param[in] num_cores_barrier Number of cores involved in the barrier + @return none +*/ void mempool_log_partial_barrier(uint32_t step, uint32_t core_id, uint32_t num_cores_barrier) { @@ -235,6 +288,14 @@ void mempool_log_partial_barrier(uint32_t step, uint32_t core_id, } } +/** + @brief Central counter barrier on a subset of cores + @param[in] core_id ID of the first core involved in the barrier + @param[in] core_init First core involved in the barrier + @param[in] num_sleeping_cores Number of cores involved in the barrier + @param[in] memloc Location of the barrier variable + @return none +*/ void mempool_partial_barrier(uint32_t volatile core_id, uint32_t volatile core_init, uint32_t volatile num_sleeping_cores,