diff --git a/frontends/systolic-lang/gen-systolic.py b/frontends/systolic-lang/gen-systolic.py index 99245f1d79..dfe992b6e6 100755 --- a/frontends/systolic-lang/gen-systolic.py +++ b/frontends/systolic-lang/gen-systolic.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 import calyx.builder as cb +from systolic_arg_parser import SystolicConfiguration +from calyx import py_ast +from calyx.utils import bits_needed from gen_array_component import ( create_systolic_array, BITWIDTH, @@ -10,14 +13,22 @@ default_post_op, relu_post_op, leaky_relu_post_op, + relu_dynamic_post_op, OUT_MEM, DEFAULT_POST_OP, RELU_POST_OP, LEAKY_RELU_POST_OP, + RELU_DYNAMIC_POST_OP, ) -from systolic_arg_parser import SystolicConfiguration, SUPPORTED_POST_OPS -from calyx import py_ast -from calyx.utils import bits_needed + +# Dict that maps command line arguments (e.g., "leaky-relu") to component names +# and function that creates them. +POST_OP_DICT = { + None: (DEFAULT_POST_OP, default_post_op), + "leaky-relu": (LEAKY_RELU_POST_OP, leaky_relu_post_op), + "relu": (RELU_POST_OP, relu_post_op), + "relu-dynamic": (RELU_DYNAMIC_POST_OP, relu_dynamic_post_op), +} def create_mem_connections( @@ -90,7 +101,7 @@ def build_main(prog, config: SystolicConfiguration, post_op_component_name): # Connect outout memories to post_op, and systolic_array_output to # post_op inputs. for i in range(left_length): - # connect output memory to post op + # Connect output memory to post op. want to write to this memory. connections += create_mem_connections( main, post_op, OUT_MEM + f"_{i}", top_length, read_mem=False ) @@ -137,19 +148,16 @@ def build_main(prog, config: SystolicConfiguration, post_op_component_name): # Building the main component prog = cb.Builder() create_systolic_array(prog, systolic_config) - if systolic_config.post_op == "leaky-relu": - leaky_relu_post_op(prog, config=systolic_config) - post_op_component_name = LEAKY_RELU_POST_OP - elif systolic_config.post_op == "relu": - relu_post_op(prog, config=systolic_config) - post_op_component_name = RELU_POST_OP - elif systolic_config.post_op is None: - default_post_op(prog, config=systolic_config) - post_op_component_name = DEFAULT_POST_OP + if systolic_config.post_op in POST_OP_DICT.keys(): + post_op_component_name, component_building_func = POST_OP_DICT[ + systolic_config.post_op + ] + component_building_func(prog, config=systolic_config) else: raise ValueError( f"{systolic_config.post_op} not supported as a post op. \ - Supported post ops are {SUPPORTED_POST_OPS}" + Supported post ops are (None means you pass no argument for -p) \ + {POST_OP_DICT.keys()}" ) build_main( diff --git a/frontends/systolic-lang/gen_post_op.py b/frontends/systolic-lang/gen_post_op.py index b2ae0e5581..206c393320 100644 --- a/frontends/systolic-lang/gen_post_op.py +++ b/frontends/systolic-lang/gen_post_op.py @@ -15,6 +15,7 @@ DEFAULT_POST_OP = "default_post_op" RELU_POST_OP = "relu_post_op" LEAKY_RELU_POST_OP = "leaky_relu_post_op" +RELU_DYNAMIC_POST_OP = "relu_dynamic_post_op" COND_REG = "cond_reg" WRITE_DONE_COND = "write_done_cond" @@ -152,23 +153,31 @@ def relu_post_op(prog: cb.Builder, config: SystolicConfiguration): imm_write_mem_post_op(prog=prog, config=config, perform_relu=True) -def leaky_relu_comp(prog: cb.Builder, idx_width: int): +def add_dynamic_op_params(comp: cb.ComponentBuilder, idx_width: int): + """ + Adds neccesary parameters for dynamic ops, including: + 1) Input value + 1) Parameters to write the result of the op to memory. + 2) Input index (for the memory to write to) + """ + comp.input("value", BITWIDTH) + comp.input("idx", idx_width) + cb.add_write_mem_params(comp, OUT_MEM, data_width=BITWIDTH, addr_width=idx_width) + + +def leaky_relu_comp(prog: cb.Builder, idx_width: int) -> cb.ComponentBuilder: """ Creates a dynamic, non-pipelined, leaky relu component. This is the component that actually performs the leaky relu computation on a given output. """ - comp = prog.component(name="leaky_relu") - comp.input("value", BITWIDTH) - # Takes a memory and register (i.e., arguments that essentially act as ref cells) - cb.add_write_mem_params(comp, OUT_MEM, data_width=BITWIDTH, addr_width=idx_width) - cb.add_register_params(comp, "idx_reg", idx_width) + comp = prog.component(name="leaky_relu_op") + add_dynamic_op_params(comp, idx_width) this = comp.this() fp_mult = comp.fp_sop("fp_mult", "mult_pipe", BITWIDTH, INTWIDTH, FRACWIDTH) lt = comp.fp_sop("val_lt", "lt", BITWIDTH, INTWIDTH, FRACWIDTH) - incr_idx = comp.add(idx_width, "incr_idx") write_mem = comp.wire("should_write_mem", 1) with comp.continuous: @@ -188,15 +197,9 @@ def leaky_relu_comp(prog: cb.Builder, idx_width: int): fp_mult.right = this.value fp_mult.go = ~(write_mem.out) @ 1 - # Increment idx_reg during the cycle that we write to memory. - incr_idx.left = this.idx_reg_out - incr_idx.right = 1 - this.idx_reg_in = write_mem.out @ incr_idx.out - this.idx_reg_write_en = write_mem.out @ 1 - # Write to memory. this.out_mem_write_en = write_mem.out @ 1 - this.out_mem_addr0 = this.idx_reg_out + this.out_mem_addr0 = this.idx # Write value if this.value >= 0 # Write mult.out if this.value < 0 this.out_mem_write_data = ~lt.out @ this.value @@ -205,24 +208,68 @@ def leaky_relu_comp(prog: cb.Builder, idx_width: int): comp.control = py_ast.Enable("do_relu") + return comp + -def create_leaky_relu_done_condition(comp: cb.ComponentBuilder, num_rows: int): +def relu_dynamic_comp(prog: cb.Builder, idx_width: int): + """ + Creates a dynamic, regular RELU component. + This dynamic implementation is meant to be compared to a static + ReLU implementation in order to show the benefits of static groups and + control. + """ + comp = prog.component(name="relu_dynamic_op") + add_dynamic_op_params(comp, idx_width) + + this = comp.this() + + lt = comp.fp_sop("val_lt", "lt", BITWIDTH, INTWIDTH, FRACWIDTH) + + with comp.continuous: + # gt holds whether this.value > 0 + lt.left = this.value + lt.right = 0 + + with comp.group("do_relu") as g: + # Write to memory. + this.out_mem_write_en = 1 + this.out_mem_addr0 = this.idx + # Write value if this.value >= 0 + # Write mult.out if this.value < 0 + this.out_mem_write_data = ~lt.out @ this.value + this.out_mem_write_data = lt.out @ 0 + + # It takes one cycle to write to g + g.done = this.out_mem_done + + comp.control = py_ast.Enable("do_relu") + + return comp + + +def generate_dynamic_post_op_done(comp: cb.ComponentBuilder, num_rows: int): """ The done condition for leaky relu components is triggered once all of the leaky relu operations have finished. """ this = comp.this() # Check if all relu operations have finished for each row - guard = comp.get_cell("relu_finished_wire_r0").out + guard = comp.get_cell("op_finished_wire_r0").out for r in range(1, num_rows): - guard = guard & comp.get_cell(f"relu_finished_wire_r{r}").out - all_relu_finished_wire = comp.wire("all_relu_finished_wire", 1) + guard = guard & comp.get_cell(f"op_finished_wire_r{r}").out + all_row_finished_wire = comp.wire("all_row_finished_wire", 1) with comp.static_group(WRITE_DONE_COND, 1): - all_relu_finished_wire.in_ = guard @ 1 - this.computation_done = all_relu_finished_wire.out @ 1 + all_row_finished_wire.in_ = guard @ 1 + this.computation_done = all_row_finished_wire.out @ 1 -def create_leaky_relu_groups(comp: cb.ComponentBuilder, row, num_cols, addr_width): +def create_dynamic_post_op_groups( + comp: cb.ComponentBuilder, + row: int, + num_cols: int, + addr_width: int, + op_component: cb.ComponentBuilder, +): """ Creates the groups for the leaky relu post op, i.e., the post-op that coordinates the execution of the leaky relu component. @@ -230,7 +277,7 @@ def create_leaky_relu_groups(comp: cb.ComponentBuilder, row, num_cols, addr_widt def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width): """ - Helper function that looks at the systolic array output signsl (e.g., + Helper function that looks at the systolic array output signals (e.g., `r0_valid`, `r0_value`, etc.) and creates signals that tells us when: a) each row is ready for the leaky relu operations to start and b) the output systolic array values (we need them in registers bc the systolic @@ -249,7 +296,7 @@ def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width): value_ready_signal = valid_signal & ( idx_signal == cb.ExprBuilder(py_ast.ConstantPort(addr_width, col)) ) - with comp.static_group(f"r{row}_c{col}_value_group", 1): + with comp.continuous: # Wire to detect and hold when the row is first valid. Once # it is valid, we can safely start our relu operations. row_ready_reg.in_ = valid_signal @ 1 @@ -265,90 +312,113 @@ def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width): reg_value.write_en = val_ready.out @ 1 # Helper function adds assignment wire.in = reg.out == col ? pe_{row}_{col}_out. - def build_assignment(group: cb.GroupBuilder, wire, register, output_val): - group.asgn( + def build_assignment(wire, register, output_val): + comp.continuous.asgn( wire.port("in"), output_val.out, register.port("out") == cb.ExprBuilder(py_ast.ConstantPort(BITWIDTH, col)), ) - group = comp.static_group(f"r{row}_helper", 1) - # Current value we are performing relu on. cur_val = comp.wire(f"r{row}_cur_val", BITWIDTH) # Current idx within the row (i.e., column) for the value we are performing relu on. idx_reg = comp.reg(f"r{row}_cur_idx", addr_width) # Handling logic to hold the systolic array's output values so they're available - # for moer than one cycle. + # for more than one cycle. store_output_vals(comp, row, num_cols, addr_width) for col in range(num_cols): output_val = comp.get_cell(f"r{row}_c{col}_val_wire") # Assigning to cur_val wire so that we always have the current value of the # row based on idx_reg. - build_assignment(group, cur_val, idx_reg, output_val) + build_assignment(cur_val, idx_reg, output_val) # Instantiate an instance of a leaky_relu component - relu_instance = comp.cell(f"leaky_relu_r{row}", py_ast.CompInst("leaky_relu", [])) + op_instance = comp.cell( + f"{op_component.component.name}_r{row}", + py_ast.CompInst(op_component.component.name, []), + ) # Wire that tells us we are finished with relu operation for this row. - relu_finished_wire = comp.wire(f"relu_finished_wire_r{row}", 1) + row_finished_wire = comp.wire(f"op_finished_wire_r{row}", 1) row_ready_wire = comp.get_cell(f"r{row}_ready_wire") + incr_idx = comp.add(bits_needed(num_cols), f"incr_idx_r{row}") # Need to pass this component's memory ports another layer down to # the leaky_relu cell. this_relu_io_ports = cb.build_connections( cell1=comp.this(), - cell2=relu_instance, + cell2=op_instance, root1=OUT_MEM + f"_{row}_", root2=OUT_MEM + "_", forward_ports=["addr0", "write_data", "write_en"], reverse_ports=["done"], ) - # Building connections between relu and idx_reg - relu_idx_io_ports = cb.build_connections( - cell1=idx_reg, - cell2=relu_instance, - root1="", - root2="idx_reg_", - forward_ports=["write_en", "in"], - reverse_ports=["out", "done"], - ) idx_limit_reached = idx_reg.out == cb.ExprBuilder( py_ast.ConstantPort(BITWIDTH, num_cols) ) with comp.static_group(f"execute_relu_r{row}", 1) as g: for i, o in this_relu_io_ports: g.asgn(i, o) - for i, o in relu_idx_io_ports: - g.asgn(i, o) # Handle incrementing the idx_reg. - relu_instance.go = ( - row_ready_wire.out & (~relu_finished_wire.out) + incr_idx.left = idx_reg.out + incr_idx.right = 1 + idx_reg.in_ = incr_idx.out + # Increment idx once the op is done executing + idx_reg.write_en = op_instance.done @ 1 + + op_instance.go = ( + row_ready_wire.out & (~row_finished_wire.out) & (~op_instance.done) ) @ cb.ExprBuilder(py_ast.ConstantPort(1, 1)) # input ports for relu_instance - relu_instance.value = cur_val.out - relu_finished_wire.in_ = idx_limit_reached @ 1 + op_instance.value = cur_val.out + op_instance.idx = idx_reg.out + row_finished_wire.in_ = idx_limit_reached @ 1 -def leaky_relu_post_op(prog: cb.Builder, config: SystolicConfiguration): +def dynamic_post_op( + prog: cb.Builder, + config: SystolicConfiguration, + post_op_component_name: str, + op_component: cb.ComponentBuilder, +): """ - Adds a dynamic leaky relu post op to `prog` + Adds a dynamic post op that performs handles the coordination so that + `op_component` (which can be dynamic) gets executed dynamically on each + systolic array output. """ num_rows, num_cols = config.get_output_dimensions() idx_width = bits_needed(num_cols) # Create a leaky relu component. - leaky_relu_comp(prog, idx_width) - comp = prog.component(name=LEAKY_RELU_POST_OP) + comp = prog.component(name=post_op_component_name) add_post_op_params(comp, num_rows, idx_width) for r in range(num_rows): - create_leaky_relu_groups(comp, r, num_cols, idx_width) - create_leaky_relu_done_condition(comp, num_rows) + create_dynamic_post_op_groups(comp, r, num_cols, idx_width, op_component) + generate_dynamic_post_op_done(comp, num_rows) # all_groups go in one big static par. all_groups = [py_ast.Enable(WRITE_DONE_COND)] for r in range(num_rows): - all_groups.append(py_ast.Enable(f"r{r}_helper")) all_groups.append(py_ast.Enable(f"execute_relu_r{r}")) - for c in range(num_cols): - all_groups.append(py_ast.Enable(f"r{r}_c{c}_value_group")) comp.control = py_ast.StaticParComp(all_groups) + + +def leaky_relu_post_op(prog: cb.Builder, config: SystolicConfiguration): + _, num_cols = config.get_output_dimensions() + leaky_relu_op_comp = leaky_relu_comp(prog, idx_width=bits_needed(num_cols)) + dynamic_post_op( + prog=prog, + config=config, + post_op_component_name=LEAKY_RELU_POST_OP, + op_component=leaky_relu_op_comp, + ) + + +def relu_dynamic_post_op(prog: cb.Builder, config: SystolicConfiguration): + _, num_cols = config.get_output_dimensions() + relu_dynamic_op_comp = relu_dynamic_comp(prog, idx_width=bits_needed(num_cols)) + dynamic_post_op( + prog=prog, + config=config, + post_op_component_name=RELU_DYNAMIC_POST_OP, + op_component=relu_dynamic_op_comp, + ) diff --git a/frontends/systolic-lang/systolic_arg_parser.py b/frontends/systolic-lang/systolic_arg_parser.py index e787e7b632..c2461e2f19 100644 --- a/frontends/systolic-lang/systolic_arg_parser.py +++ b/frontends/systolic-lang/systolic_arg_parser.py @@ -1,7 +1,7 @@ import argparse import json -SUPPORTED_POST_OPS = ["leaky-relu", "relu"] +SUPPORTED_POST_OPS = ["leaky-relu", "relu", "relu-dynamic"] class SystolicConfiguration: diff --git a/runt.toml b/runt.toml index 50b15d8e03..258ccb7602 100644 --- a/runt.toml +++ b/runt.toml @@ -267,6 +267,7 @@ paths = [ "tests/correctness/systolic/output/*.systolic", "tests/correctness/systolic/leaky-relu/*.systolic", "tests/correctness/systolic/relu/*.systolic", + "tests/correctness/systolic/relu-dynamic/*.systolic", ] cmd = """ fud e --from systolic --to dat \ diff --git a/tests/correctness/systolic/leaky-relu/array-2-3-4.expect b/tests/correctness/systolic/leaky-relu/array-2-3-4.expect index b2e988ea2d..a9bd75bc9e 100644 --- a/tests/correctness/systolic/leaky-relu/array-2-3-4.expect +++ b/tests/correctness/systolic/leaky-relu/array-2-3-4.expect @@ -1,5 +1,5 @@ { - "cycles": 18, + "cycles": 22, "memories": { "l0": [ "-1.7772064208984375", diff --git a/tests/correctness/systolic/leaky-relu/array-8.expect b/tests/correctness/systolic/leaky-relu/array-8.expect index 1d8c768d2e..b7473d7cad 100644 --- a/tests/correctness/systolic/leaky-relu/array-8.expect +++ b/tests/correctness/systolic/leaky-relu/array-8.expect @@ -1,5 +1,5 @@ { - "cycles": 39, + "cycles": 47, "memories": { "l0": [ "3.5089263916015625", diff --git a/tests/correctness/systolic/relu-dynamic/array-2-3-4.expect b/tests/correctness/systolic/relu-dynamic/array-2-3-4.expect new file mode 100644 index 0000000000..f642045e3b --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-2-3-4.expect @@ -0,0 +1,47 @@ +{ + "cycles": 19, + "memories": { + "l0": [ + "3.349822998046875", + "3.6539764404296875", + "0.0511016845703125" + ], + "l1": [ + "-0.1660003662109375", + "3.43817138671875", + "0.1073760986328125" + ], + "out_mem_0": [ + "12.569793701171875", + "0", + "1.2611541748046875", + "8.012969970703125" + ], + "out_mem_1": [ + "0", + "0", + "0.964019775390625", + "1.9193878173828125" + ], + "t0": [ + "4.0580596923828125", + "-0.2537078857421875", + "-1.896697998046875" + ], + "t1": [ + "-1.2882843017578125", + "-3.0372772216796875", + "3.0884552001953125" + ], + "t2": [ + "0.073028564453125", + "0.2735595703125", + "0.3317718505859375" + ], + "t3": [ + "1.702178955078125", + "0.6259765625", + "0.4635009765625" + ] + } +} diff --git a/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic b/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic new file mode 100644 index 0000000000..c738489e15 --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic @@ -0,0 +1,7 @@ +{ + "top_length": 4, + "top_depth": 3, + "left_length": 2, + "left_depth": 3, + "post_op": "relu-dynamic" +} \ No newline at end of file diff --git a/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic.data b/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic.data new file mode 100644 index 0000000000..823b904b4e --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-2-3-4.systolic.data @@ -0,0 +1,108 @@ +{ + "l0": { + "data": [ + 3.349827766418457, + 3.6539793014526367, + 0.05110502243041992 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l1": { + "data": [ + -0.16600513458251953, + 3.4381656646728516, + 0.1073751449584961 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_0": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_1": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t0": { + "data": [ + 4.058065414428711, + -0.2537107467651367, + -1.8967020511627197 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t1": { + "data": [ + -1.2882888317108154, + -3.0372798442840576, + 3.0884532928466797 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t2": { + "data": [ + 0.07303619384765625, + 0.27355289459228516, + 0.3317680358886719 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t3": { + "data": [ + 1.702174186706543, + 0.6259689331054688, + 0.4635009765625 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + } +} \ No newline at end of file diff --git a/tests/correctness/systolic/relu-dynamic/array-8.expect b/tests/correctness/systolic/relu-dynamic/array-8.expect new file mode 100644 index 0000000000..d8627be637 --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-8.expect @@ -0,0 +1,245 @@ +{ + "cycles": 38, + "memories": { + "l0": [ + "-1.49566650390625", + "-0.9142608642578125", + "-2.6135711669921875", + "1.4980010986328125", + "-3.4720001220703125", + "3.4763031005859375", + "-0.016571044921875", + "-2.989044189453125" + ], + "l1": [ + "-1.33184814453125", + "2.9468841552734375", + "1.7263946533203125", + "-1.7017974853515625", + "-1.13458251953125", + "-4.711273193359375", + "-0.290771484375", + "-4.2563934326171875" + ], + "l2": [ + "1.1874237060546875", + "-3.408599853515625", + "-1.801544189453125", + "1.1929931640625", + "-1.8954010009765625", + "-3.1168060302734375", + "0.3030242919921875", + "-1.095611572265625" + ], + "l3": [ + "-2.2605743408203125", + "-4.778472900390625", + "4.631988525390625", + "-2.366485595703125", + "2.2865142822265625", + "-1.9458465576171875", + "2.51043701171875", + "-0.420989990234375" + ], + "l4": [ + "2.081451416015625", + "-2.0117034912109375", + "-3.0953369140625", + "0.8167266845703125", + "-0.9581451416015625", + "-2.253204345703125", + "3.2512359619140625", + "0.8149871826171875" + ], + "l5": [ + "4.9659271240234375", + "-4.0330352783203125", + "0.3494110107421875", + "0.9843292236328125", + "-1.1136932373046875", + "-0.161285400390625", + "3.5453338623046875", + "-4.283843994140625" + ], + "l6": [ + "3.9283294677734375", + "1.32122802734375", + "-0.6650543212890625", + "4.8896026611328125", + "2.450042724609375", + "2.7947235107421875", + "1.7928314208984375", + "-1.518035888671875" + ], + "l7": [ + "-4.6738739013671875", + "-4.4643096923828125", + "0.1487274169921875", + "-3.4300994873046875", + "2.415863037109375", + "-3.8175506591796875", + "3.8909149169921875", + "-0.361236572265625" + ], + "out_mem_0": [ + "0", + "0", + "0", + "23.335174560546875", + "5.11322021484375", + "0", + "0.1280364990234375", + "0" + ], + "out_mem_1": [ + "0", + "1.920379638671875", + "0", + "45.25958251953125", + "32.78094482421875", + "0", + "0", + "33.4017333984375" + ], + "out_mem_2": [ + "0", + "0", + "0", + "29.5540618896484375", + "23.58514404296875", + "0", + "0.9548797607421875", + "0" + ], + "out_mem_3": [ + "21.81707763671875", + "0", + "0", + "5.942596435546875", + "19.353363037109375", + "0", + "6.6043853759765625", + "21.3936614990234375" + ], + "out_mem_4": [ + "0", + "1.3929443359375", + "0", + "16.4472808837890625", + "16.946075439453125", + "0", + "21.7616119384765625", + "0" + ], + "out_mem_5": [ + "29.9978179931640625", + "0", + "0", + "18.637359619140625", + "42.1724090576171875", + "0", + "6.4488067626953125", + "4.316436767578125" + ], + "out_mem_6": [ + "34.8578338623046875", + "18.028961181640625", + "23.9514617919921875", + "0", + "16.2915496826171875", + "2.3678436279296875", + "15.6302032470703125", + "0" + ], + "out_mem_7": [ + "0", + "0", + "0", + "27.7047271728515625", + "18.578521728515625", + "0", + "14.9741973876953125", + "10.2684478759765625" + ], + "t0": [ + "4.9021759033203125", + "-2.5044708251953125", + "4.5919189453125", + "0.601043701171875", + "2.68255615234375", + "4.476806640625", + "1.82684326171875", + "2.19659423828125" + ], + "t1": [ + "3.4096832275390625", + "4.9857330322265625", + "-3.820037841796875", + "-0.70623779296875", + "4.9615936279296875", + "-2.083038330078125", + "-2.567901611328125", + "1.8252105712890625" + ], + "t2": [ + "-0.772216796875", + "4.754638671875", + "-0.936676025390625", + "2.935699462890625", + "2.22613525390625", + "1.5690155029296875", + "-1.6681365966796875", + "0.73956298828125" + ], + "t3": [ + "-3.9584503173828125", + "0.05987548828125", + "-0.902618408203125", + "3.788360595703125", + "-3.4463958740234375", + "-4.98907470703125", + "2.5916900634765625", + "-4.97015380859375" + ], + "t4": [ + "0.08782958984375", + "-0.653778076171875", + "1.889984130859375", + "3.657745361328125", + "-1.1245880126953125", + "-3.8535003662109375", + "3.775634765625", + "-4.5704193115234375" + ], + "t5": [ + "-0.3739166259765625", + "1.5081634521484375", + "0.3743133544921875", + "3.133514404296875", + "-1.101104736328125", + "0.3949737548828125", + "-3.9834136962890625", + "2.9597930908203125" + ], + "t6": [ + "1.1507720947265625", + "-4.7238922119140625", + "-3.618682861328125", + "-0.121856689453125", + "3.2436065673828125", + "3.9204864501953125", + "2.1115570068359375", + "4.709442138671875" + ], + "t7": [ + "0.51519775390625", + "2.701629638671875", + "4.22308349609375", + "-3.078399658203125", + "0.3193511962890625", + "-1.3680877685546875", + "1.766815185546875", + "-1.8860321044921875" + ] + } +} diff --git a/tests/correctness/systolic/relu-dynamic/array-8.systolic b/tests/correctness/systolic/relu-dynamic/array-8.systolic new file mode 100644 index 0000000000..ea8c555b83 --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-8.systolic @@ -0,0 +1,7 @@ +{ + "top_length": 8, + "top_depth": 8, + "left_length": 8, + "left_depth": 8, + "post_op": "relu-dynamic" +} \ No newline at end of file diff --git a/tests/correctness/systolic/relu-dynamic/array-8.systolic.data b/tests/correctness/systolic/relu-dynamic/array-8.systolic.data new file mode 100644 index 0000000000..34b8677274 --- /dev/null +++ b/tests/correctness/systolic/relu-dynamic/array-8.systolic.data @@ -0,0 +1,434 @@ +{ + "l0": { + "data": [ + -1.4956629276275635, + -0.9142565727233887, + -2.6135730743408203, + 1.4980006217956543, + -3.472006320953369, + 3.476308822631836, + -0.016571044921875, + -2.989037036895752 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l1": { + "data": [ + -1.3318490982055664, + 2.9468870162963867, + 1.726388931274414, + -1.7017900943756104, + -1.1345791816711426, + -4.711271286010742, + -0.2907705307006836, + -4.256398677825928 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l2": { + "data": [ + 1.187424659729004, + -3.408595323562622, + -1.801539659500122, + 1.1929893493652344, + -1.8953990936279297, + -3.116804361343384, + 0.3030214309692383, + -1.0956144332885742 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l3": { + "data": [ + -2.2605812549591064, + -4.778478145599365, + 4.631991386413574, + -2.3664891719818115, + 2.2865095138549805, + -1.9458460807800293, + 2.51043701171875, + -0.42099571228027344 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l4": { + "data": [ + 2.0814504623413086, + -2.011702060699463, + -3.095329999923706, + 0.8167314529418945, + -0.9581432342529297, + -2.253211736679077, + 3.2512292861938477, + 0.8149852752685547 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l5": { + "data": [ + 4.965932846069336, + -4.033029079437256, + 0.34941768646240234, + 0.9843292236328125, + -1.1136901378631592, + -0.16128921508789062, + 3.5453338623046875, + -4.283846855163574 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l6": { + "data": [ + 3.9283323287963867, + 1.3212251663208008, + -0.6650471687316895, + 4.8896074295043945, + 2.45003604888916, + 2.7947306632995605, + 1.7928242683410645, + -1.5180349349975586 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "l7": { + "data": [ + -4.673877716064453, + -4.464309215545654, + 0.14873123168945312, + -3.4300994873046875, + 2.415858268737793, + -3.8175439834594727, + 3.890915870666504, + -0.36124229431152344 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_0": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_1": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_2": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_3": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_4": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_5": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_6": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "out_mem_7": { + "data": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t0": { + "data": [ + 4.902173042297363, + -2.5044643878936768, + 4.591917037963867, + 0.601048469543457, + 2.6825523376464844, + 4.476808547973633, + 1.8268499374389648, + 2.1965885162353516 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t1": { + "data": [ + 3.4096832275390625, + 4.9857330322265625, + -3.8200438022613525, + -0.706233024597168, + 4.961596488952637, + -2.0830368995666504, + -2.5678956508636475, + 1.825209617614746 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t2": { + "data": [ + -0.7722148895263672, + 4.754638671875, + -0.9366703033447266, + 2.935696601867676, + 2.2261343002319336, + 1.569009780883789, + -1.6681408882141113, + 0.7395687103271484 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t3": { + "data": [ + -3.9584505558013916, + 0.05988121032714844, + -0.9026155471801758, + 3.7883644104003906, + -3.446394205093384, + -4.989075660705566, + 2.5916852951049805, + -4.970152378082275 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t4": { + "data": [ + 0.08782482147216797, + -0.6537842750549316, + 1.8899774551391602, + 3.6577377319335938, + -1.1245930194854736, + -3.853501081466675, + 3.7756290435791016, + -4.57042121887207 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t5": { + "data": [ + -0.3739175796508789, + 1.5081634521484375, + 0.3743171691894531, + 3.1335086822509766, + -1.1011040210723877, + 0.39496898651123047, + -3.983415365219116, + 2.9597973823547363 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t6": { + "data": [ + 1.1507759094238281, + -4.7238874435424805, + -3.618685007095337, + -0.12185335159301758, + 3.2436037063598633, + 3.9204845428466797, + 2.1115517616271973, + 4.709440231323242 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + }, + "t7": { + "data": [ + 0.5151915550231934, + 2.7016282081604004, + 4.223084449768066, + -3.078404664993286, + 0.31935787200927734, + -1.3680875301361084, + 1.7668190002441406, + -1.8860268592834473 + ], + "format": { + "frac_width": 16, + "is_signed": true, + "numeric_type": "fixed_point", + "width": 32 + } + } +} \ No newline at end of file