Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Barrier improvements #78

Closed
wants to merge 11 commits into from
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Update `register_interface` to 0.4.3
- Updated Halide to version 15
- Move instruction cache into its own dependency
- Add Logk tree barrier and barrier with a fraction of linear and Log2 arrival
- Add registers to wakeup cores with stride and offset
- Add barrier with stride and offset

### Fixed
- Fix type issue in `snitch_addr_demux`
Expand Down
33 changes: 27 additions & 6 deletions hardware/scripts/gen_benchmark_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,44 @@

def create_dataframe(directory: str):
keys = ['cycles',
'max_cycles',
'min_cycles',
'std_cycles',
'snitch_loads',
'snitch_stores',
'snitch_avg_load_latency',
'snitch_occupancy',
'total_ipc',
'snitch_issues ',
'snitch_issues',
'max_snitch_issues',
'min_snitch_issues',
'std_snitch_issues',
'stall_tot',
'max_stall_tot',
'min_stall_tot',
'std_stall_tot',
'stall_ins',
'max_stall_ins',
'min_stall_ins',
'std_stall_ins',
'stall_raw',
'max_stall_raw',
'min_stall_raw',
'std_stall_raw',
'stall_raw_lsu',
'stall_raw_acc',
'stall_lsu',
'max_stall_lsu',
'min_stall_lsu',
'std_stall_lsu',
'stall_acc',
'max_stall_acc',
'min_stall_acc',
'std_stall_acc',
'stall_wfi',
'max_stall_wfi',
'min_stall_wfi',
'std_stall_wfi',
'seq_loads_local',
'seq_loads_global',
'itl_loads_local',
Expand All @@ -48,22 +72,20 @@ def create_dataframe(directory: str):
path = os.getcwd()
df = pd.DataFrame(index=keys)
for subdir in os.listdir(path):
filename = os.path.join(subdir, 'avg.txt')
filename = os.path.join(subdir, 'max.txt')
filetext = open(filename).read()
values = []
for key in keys:
values.append(
re.findall(
r'%s\s*[+-]?([0-9]*[.]?[0-9]+)' %
r'\b%s\b\s*[+-]?([0-9]*[.]?[0-9]+)' %
(key), filetext))
df[subdir] = (np.asarray(values)).flatten()
return df


def main():

script_path = pathlib.Path(__file__).parent.absolute()

# Parse arguments
parser = argparse.ArgumentParser(
description='Extract performance data from log files')
Expand All @@ -89,7 +111,6 @@ def main():
action='store_true',
help='Set verbose'
)

args = parser.parse_args()
df = create_dataframe(args.input)
df.to_excel(os.path.join(args.output, 'table.xls'))
Expand Down
95 changes: 95 additions & 0 deletions hardware/scripts/gen_max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python3

# Copyright 2022 ETH Zurich and University of Bologna.
# Solderpad Hardware License, Version 0.51, see LICENSE for details.
# SPDX-License-Identifier: SHL-0.51

# This script takes a set of .csv files in one of the results folders and
# generates the average and the max/min performance.
# Author: Marco Bertuletti <[email protected]>

import os
import pandas as pd
import numpy as np
import argparse
import sys

ext = ('.csv')

parser = argparse.ArgumentParser()
parser.add_argument(
'--folder',
'-f',
help='Name of the results folder with traces to be averaged.'
)
args = parser.parse_args()

os.chdir(args.folder)
path = os.getcwd()
print(path)

for subdir in os.listdir(path):
subdir_path = os.path.join(path, subdir)
os.chdir(subdir_path)
print(subdir_path)
for files in os.listdir(subdir_path):
if files.endswith(ext):
csvread = pd.read_csv(files)
orig_stdout = sys.stdout
f = open('max.txt', 'w')
sys.stdout = f

print("\n")
print("*******************************")
print("** AVERAGE PERFORMANCE **")
print("*******************************")
print("")

for section in set(csvread['section']):
print("Section %d:\n" % section)
sectionread = csvread.loc[csvread['section'] == section]
keys = csvread.columns
remove_keys = ['core',
'section',
'start',
'end',
'snitch_load_latency',
'snitch_load_region',
'snitch_load_tile',
'snitch_store_region',
'snitch_store_tile']
keys = keys.drop(remove_keys, errors='raise')
for key in keys:
try:
column = sectionread[key].replace(np.nan, 0)
column = column.to_numpy()
avg = np.average(column)
if key in ['cycles',
'snitch_issues',
'stall_tot',
'stall_ins',
'stall_raw',
'stall_lsu',
'stall_acc',
'stall_wfi']:
max_val = np.max(column)
min_val = np.min(column)
std_val = np.std(column)
except Exception:
# Key could not be averaged
continue
print("%-30s %4.4f" % (key, avg))
if key in ['cycles',
'snitch_issues',
'stall_tot',
'stall_ins',
'stall_raw',
'stall_lsu',
'stall_acc',
'stall_wfi']:
print("%-30s %4.4f" % (('max_' + key), max_val))
print("%-30s %4.4f" % (('min_' + key), min_val))
print("%-30s %4.4f" % (('std_' + key), std_val))
sys.stdout = orig_stdout
f.close()
os.chdir(args.folder)
56 changes: 38 additions & 18 deletions hardware/src/ctrl_registers.sv
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,23 @@ module ctrl_registers
// [3 :0 ]:eoc_reg (rw)
// [7 :4 ]:wake_up_reg (rw)
// [11:8 ]:wake_up_group_reg (rw)
// [15:12]:tcdm_start_adress_reg (ro)
// [19:16]:tcdm_end_address_reg (ro)
// [23:20]:nr_cores_address_reg (ro)
// [27:24]:ro_cache_enable (rw)
// [31:28]:ro_cache_flush (rw)
// [35:32]:ro_cache_start_0 (rw)
// [39:36]:ro_cache_end_0 (rw)
// [43:40]:ro_cache_start_1 (rw)
// [47:44]:ro_cache_end_1 (rw)
// [51:48]:ro_cache_start_2 (rw)
// [55:52]:ro_cache_end_2 (rw)
// [59:56]:ro_cache_start_3 (rw)
// [63:60]:ro_cache_end_3 (rw)

// [95:64]:wake_up_tile[7:0] (rw)
// [15:12]:wake_up_stride_reg (rw)
// [19:16]:wake_up_offset_reg (rw)
// [23:20]:tcdm_start_adress_reg (ro)
// [27:24]:tcdm_end_address_reg (ro)
// [31:28]:nr_cores_address_reg (ro)
// [35:32]:ro_cache_enable (rw)
// [39:36]:ro_cache_flush (rw)
// [43:40]:ro_cache_start_0 (rw)
// [47:44]:ro_cache_end_0 (rw)
// [51:48]:ro_cache_start_1 (rw)
// [55:52]:ro_cache_end_1 (rw)
// [59:56]:ro_cache_start_2 (rw)
// [63:60]:ro_cache_end_2 (rw)
// [67:64]:ro_cache_start_3 (rw)
// [71:68]:ro_cache_end_3 (rw)

// [103:72]:wake_up_tile[7:0] (rw)

localparam logic [MAX_NumGroups*DataWidth-1:0] RegRstVal_TileWakeUp = '{MAX_NumGroups*DataWidth{1'b0}};
localparam logic [NumRegs-MAX_NumGroups-1:0][DataWidth-1:0] RegRstVal = '{
Expand All @@ -81,6 +83,8 @@ module ctrl_registers
TCDMBaseAddr + TCDMSize,
TCDMBaseAddr,
{DataWidth{1'b0}},
32'b1,
{DataWidth{1'b0}},
{DataWidth{1'b0}},
{DataWidth{1'b0}}
};
Expand All @@ -102,6 +106,8 @@ module ctrl_registers
ReadOnlyReg,
ReadWriteReg,
ReadWriteReg,
ReadWriteReg,
ReadWriteReg,
ReadWriteReg
};

Expand All @@ -111,6 +117,8 @@ module ctrl_registers
logic [DataWidth-1:0] eoc;
logic [DataWidth-1:0] wake_up;
logic [DataWidth-1:0] wake_up_group;
logic [DataWidth-1:0] wake_up_stride;
logic [DataWidth-1:0] wake_up_offset;
logic [DataWidth-1:0] tcdm_start_address;
logic [DataWidth-1:0] tcdm_end_address;
logic [DataWidth-1:0] num_cores;
Expand All @@ -125,6 +133,7 @@ module ctrl_registers
logic [DataWidth-1:0] ro_cache_start_3;
logic [DataWidth-1:0] ro_cache_end_3;
logic [MAX_NumGroups*DataWidth-1:0] wake_up_tile;
logic [NumCores-1:0] wake_up_mask;

logic [RegNumBytes-1:0] wr_active_d;
logic [RegNumBytes-1:0] wr_active_q;
Expand Down Expand Up @@ -152,7 +161,8 @@ module ctrl_registers
ro_cache_end_1, ro_cache_start_1,
ro_cache_end_0, ro_cache_start_0,
ro_cache_flush, ro_cache_enable,
num_cores, tcdm_end_address, tcdm_start_address, wake_up_group, wake_up, eoc })
num_cores, tcdm_end_address, tcdm_start_address,
wake_up_offset, wake_up_stride, wake_up_group, wake_up, eoc })
);

/***************
Expand All @@ -177,12 +187,20 @@ module ctrl_registers

always_comb begin
wake_up_o = '0;
wake_up_mask = '0;

// create mask for wake_up with stride and offset
for(int i = wake_up_offset; i < NumCores; i = i + wake_up_stride) begin
wake_up_mask[i] = 1;
end

// converts 32 bit wake up to 256 bit
if (wr_active_q[7:4]) begin
if (wake_up < NumCores) begin
wake_up_o = 1 << wake_up;
end else if (wake_up == {DataWidth{1'b1}}) begin
wake_up_o = {NumCores{1'b1}};
wake_up_o = wake_up_o & wake_up_mask;
end
end
// converts 32 bit group wake up mask to 256 bit core wake up mask
Expand All @@ -191,19 +209,21 @@ module ctrl_registers
for(int i = 0; i < NumGroups; i = i + 1) begin
wake_up_o[NumCoresPerGroup * i +: NumCoresPerGroup] = {NumCoresPerGroup{wake_up_group[i]}};
end
wake_up_o = wake_up_o & wake_up_mask;
end else if (wake_up_group == {DataWidth{1'b1}}) begin
wake_up_o = {NumCores{1'b1}};
wake_up_o = wake_up_o & wake_up_mask;
end
end

// converts 32 bit tile wake up mask to 256 bit core wake up mask
for(int i_g = 0; i_g < NumGroups; i_g = i_g + 1) begin

if (wr_active_q[64 + 4 * i_g +: 4]) begin
if (wr_active_q[72 + 4 * i_g +: 4]) begin
if (wake_up_tile[i_g * DataWidth +: DataWidth] <= {NumTilesPerGroup{1'b1}}) begin
for (int i = 0; i < NumTilesPerGroup; i = i + 1) begin
wake_up_o[NumCoresPerGroup * i_g + NumCoresPerTile * i +: NumCoresPerTile] = {NumCoresPerTile{wake_up_tile[i_g * DataWidth + i]}};
end
wake_up_o = wake_up_o & wake_up_mask;
end
end

Expand Down
2 changes: 1 addition & 1 deletion hardware/src/mempool_system.sv
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ module mempool_system
);

ctrl_registers #(
.NumRegs (16 + 8 ),
.NumRegs (18 + 8 ),
.TCDMBaseAddr (TCDMBaseAddr ),
.TCDMSize (TCDMSize ),
.NumCores (NumCores ),
Expand Down
18 changes: 18 additions & 0 deletions software/data/data_barriers_test.h.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
\
<% def array_to_cstr(array):
out = '{'
i = 0
out += '\n'
for a in array:
out += '{}, '.format(a)
i += 1
if i % 8 == 0:
out += '\n'
out = out[:-2] + '}'
return out
%> \

uint32_t core_delays[${num_cores}] = ${array_to_cstr(delays)};
Loading
Loading