Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing Reliability for Instruction Cache (L0, L1, RO) #2

Draft
wants to merge 46 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
92d94d5
created testbench configuration
Oct 9, 2023
e439871
added parity bits and checks to tag and data banks
nicomar0 Oct 12, 2023
fc731df
[hardware]: implemented first version of faulty tag invalidation
nicomar0 Oct 15, 2023
f1ac757
[hardware] Fixed data parity bit width, added configurable split of t…
nicomar0 Oct 16, 2023
414d3b6
[hardware] Moved logic related to reliability mode outside the always…
nicomar0 Oct 16, 2023
d979936
[hardware] Started verification of detection mechanism, there are som…
nicomar0 Oct 17, 2023
a192ef7
[hardware] First version of invalidation handshaking implemented, sti…
nicomar0 Oct 20, 2023
12fd49a
[hardware] Fixed invalidation handshaking
nicomar0 Oct 21, 2023
5723f73
[hardware] Modified handshaking for invalidation stage from data stag…
nicomar0 Oct 23, 2023
1acb39b
[hardware] Modified simple injection mechanism
nicomar0 Oct 23, 2023
00a2762
[hardware] Added fault injection script and clock
nicomar0 Oct 24, 2023
10ba933
[hardware] Fixed error in invalidation stage tag address, added debug…
nicomar0 Oct 29, 2023
29a96d2
[hardware] Changed data caches parity from even to odd
nicomar0 Oct 30, 2023
bd01e07
[hardware] Fixed tag invalidation procedure combinational loop
nicomar0 Oct 31, 2023
1a16e90
[hardware] Changed definition of registers where to inject faults, fi…
nicomar0 Nov 4, 2023
e55a728
[hardware] Added control parameters for fault tolerance in ROC and L1IC
nicomar0 Nov 5, 2023
6b1e4ad
[hardware] Changed default flags
nicomar0 Nov 6, 2023
1505f20
[hardware] Added injection script for mempool, including extracting nets
nicomar0 Nov 8, 2023
119c968
[hardware] Injection scripts working
nicomar0 Nov 9, 2023
05c8e2e
[hardware] Changed number of parity bits in the instruction data banks
nicomar0 Nov 13, 2023
d5e983a
[hardware] Added assertions in lookup
nicomar0 Nov 16, 2023
3456ab0
[hardware] Added fault option in make file
nicomar0 Nov 19, 2023
81c33f2
[snitch] Added waves for debugging
nicomar0 Nov 26, 2023
2ac39b8
[snitch] icache handler support for fault having priority over write …
nicomar0 Nov 26, 2023
0554fa4
[hardware] Modified fault injection clock
nicomar0 Nov 26, 2023
ee71a4c
[hardware] Fixed handler flag
nicomar0 Nov 27, 2023
70eeb2d
[hardware] Changed parity width
nicomar0 Nov 27, 2023
867f6fe
[hardware] Set injection clock
nicomar0 Nov 27, 2023
92aa868
[hardware] Added parity bits to l0 cache
nicomar0 Nov 21, 2023
370211d
[hardware] Added shuffle of registers
nicomar0 Nov 28, 2023
3221d81
[snitch] Added parity checks in the L0 caches
nicomar0 Nov 28, 2023
0c2bbeb
[snitch] Completed parity check for data part in l0
nicomar0 Nov 29, 2023
5aa1caa
[snitch] Added parity checks for the tag in L0 caches
nicomar0 Nov 30, 2023
4bb3cb5
[snitch] Tag invalidation after fault in data banks in l0 caches
nicomar0 Dec 1, 2023
097f42a
[hardware] Included l0 banks in injection scripts
nicomar0 Dec 1, 2023
e4cd023
[hardware] Modified parameters to enable FT in L0 caches
nicomar0 Dec 2, 2023
c9c3b97
[snitch] Fixed missing control parameter
nicomar0 Dec 3, 2023
dc94527
[snitch] Removed comments
nicomar0 Dec 4, 2023
5118bf0
[snitch] Inserted counters for faults, removed comments
nicomar0 Dec 5, 2023
d42a18d
[snitch] Print fault statistics in a file
nicomar0 Dec 5, 2023
217b9f7
[snitch] Fixed eviction vector
nicomar0 Dec 8, 2023
38d1ffd
[snitch] Changed comments
nicomar0 Dec 8, 2023
7d0369c
[Snitch] Added comments in the lookup serial
nicomar0 Jan 3, 2024
b323e58
[snitch] Added readme file for the ROC test bench and updated comments
nicomar0 Jan 7, 2024
79aeb58
[hardware] Added README file about fault injection
nicomar0 Jan 8, 2024
9dcfc02
[hardware] Updated readme
nicomar0 Jan 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions config/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,16 @@ xqueue_size ?= 0

# Enable the XpulpIMG extension
xpulpimg ?= 1

# Enable the Reliability mode for RO caches and L1 and L0 icache
rel_rocache ?= 1

rel_l1icache ?= 1

rel_l0icache ?= 0

# Enable fault injection in the instruction caches and define fault rate (clok_periods/fault)

fault_injection ?= 0

fault_rate ?= 100
3 changes: 3 additions & 0 deletions hardware/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ verilator_top ?= mempool_tb_verilator
python ?= python3
# Enable tracing
snitch_trace ?= 0
# Enable faults
icache_faults ?= 0

# Check if the specified QuestaSim version exists
ifeq (, $(shell which $(questa_cmd)))
Expand Down Expand Up @@ -97,6 +99,7 @@ vlog_defs += -DRO_LINE_WIDTH=$(ro_line_width)
vlog_defs += -DDMAS_PER_GROUP=$(dmas_per_group)
vlog_defs += -DAXI_HIER_RADIX=$(axi_hier_radix) -DAXI_MASTERS_PER_GROUP=$(axi_masters_per_group)
vlog_defs += -DSEQ_MEM_SIZE=$(seq_mem_size) -DXQUEUE_SIZE=$(xqueue_size)
vlog_defs += -DREL_ROCACHE=$(rel_rocache) -DREL_L1ICACHE=$(rel_l1icache) -DREL_L0ICACHE=$(rel_l0icache)

# Traffic generation enabled
ifdef tg
Expand Down
6 changes: 6 additions & 0 deletions hardware/deps/snitch/Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ sources:
- src/snitch_icache/snitch_icache_refill.sv
- src/snitch_read_only_cache/snitch_axi_to_cache.sv
- src/snitch_read_only_cache/snitch_read_only_cache.sv

- target: rocache_test
files:
#- ../../tb/RO_cache_tb/sourcecode/tb/snitch_read_only_cache_tb.sv
- tb/src/snitch_read_only_cache_tb.sv

7 changes: 7 additions & 0 deletions hardware/deps/snitch/src/snitch_icache/snitch_icache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ module snitch_icache #(
parameter int FILL_AW = -1,
/// Fill interface data width. Power of two; >= 8.
parameter int FILL_DW = -1,
/// Add parity checks for the L1 caches
parameter bit RELIABILITY_L1 = 0,
/// Add parity checks for the L0 caches
parameter bit RELIABILITY_L0 = 0,
/// Replace the L1 tag banks with latch-based SCM.
parameter bit L1_TAG_SCM = 0,
/// This reduces area impact at the cost of
Expand All @@ -42,6 +46,7 @@ module snitch_icache #(
parameter int L0_EARLY_TAG_WIDTH = -1,
/// Operate L0 cache in slower clock-domain
parameter bit ISO_CROSSING = 1,

parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
Expand Down Expand Up @@ -79,6 +84,8 @@ module snitch_icache #(
FETCH_DW: FETCH_DW,
FILL_AW: FILL_AW,
FILL_DW: FILL_DW,
RELIABILITY_L1: RELIABILITY_L1,
RELIABILITY_L0: RELIABILITY_L0,
L1_TAG_SCM: L1_TAG_SCM,
EARLY_LATCH: EARLY_LATCH,
BUFFER_LOOKUP: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ module snitch_icache_handler #(
.empty_o ( )
);

// Gurarntee ordering
// Guarantee ordering
// Check if there is a miss in flight from this ID. In that case, stall all
// further requests to guarantee correct ordering of requests.
logic [CFG.ID_WIDTH_RESP-1:0] miss_in_flight_d, miss_in_flight_q;
Expand Down Expand Up @@ -214,8 +214,8 @@ module snitch_icache_handler #(
in_req_ready_o = hit_ready;

// The cache lookup was a miss, but there is already a pending
// refill that covers the line.
end else if (pending) begin
// refill that covers the line and the lookup accepted the request.
end else if (pending && !(write_valid_o && !write_ready_i)) begin
push_index = pending_id;
push_enable = 1;

Expand Down
202 changes: 158 additions & 44 deletions hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,20 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
input logic out_rsp_valid_i,
output logic out_rsp_ready_o
);
localparam bit RELIABILITY_MODE = CFG.RELIABILITY_L0;
localparam int DATA_PARITY_WIDTH = RELIABILITY_MODE ? 'd8 : '0;
localparam LINE_SPLIT = CFG.LINE_WIDTH/DATA_PARITY_WIDTH;

typedef logic [CFG.FETCH_AW-1:0] addr_t;
typedef struct packed {
logic [CFG.L0_TAG_WIDTH-1:0] tag;
logic [CFG.L0_TAG_WIDTH+RELIABILITY_MODE-1:0] tag;
logic vld;
} tag_t;

logic [CFG.L0_TAG_WIDTH-1:0] addr_tag, addr_tag_prefetch;

tag_t [CFG.L0_LINE_COUNT-1:0] tag;
logic [CFG.L0_LINE_COUNT-1:0][CFG.LINE_WIDTH-1:0] data;
logic [CFG.L0_LINE_COUNT-1:0][CFG.LINE_WIDTH+DATA_PARITY_WIDTH-1:0] data;

logic [CFG.L0_LINE_COUNT-1:0] hit, hit_early, hit_prefetch;
logic hit_early_is_onehot;
Expand Down Expand Up @@ -83,6 +86,10 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
`FF(last_cycle_was_prefetch_q, latch_prefetch, '0)

logic evict_because_miss, evict_because_prefetch;

logic data_parity_error;
logic [CFG.L0_LINE_COUNT-1:0] tag_parity_error_vect;
logic [CFG.L0_LINE_COUNT-1:0] exp_tag_parity;

typedef struct packed {
logic is_prefetch;
Expand All @@ -104,22 +111,53 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
// ------------
// Tag Compare
// ------------
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_cmp_fetch
assign hit_early[i] = tag[i].vld &
(tag[i].tag[CFG.L0_EARLY_TAG_WIDTH-1:0] == addr_tag[CFG.L0_EARLY_TAG_WIDTH-1:0]);
// The two signals calculate the same.
if (CFG.L0_TAG_WIDTH == CFG.L0_EARLY_TAG_WIDTH) begin : gen_hit_assign
assign hit[i] = hit_early[i];
// Compare the rest of the tag.
end else begin : gen_hit
assign hit[i] = hit_early[i] &
(tag[i].tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]
== addr_tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]);

// For every line, compute the current parity bit
if (RELIABILITY_MODE) begin
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_exp_parity
assign exp_tag_parity[i] = ~^tag[i].tag[CFG.L0_TAG_WIDTH-1:0];
end
assign hit_prefetch[i] = tag[i].vld & (tag[i].tag == addr_tag_prefetch);
end

assign hit_any = |hit;
if (!RELIABILITY_MODE) begin
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_cmp_fetch
assign hit_early[i] = tag[i].vld &
(tag[i].tag[CFG.L0_EARLY_TAG_WIDTH-1:0] == addr_tag[CFG.L0_EARLY_TAG_WIDTH-1:0]);
// The two signals calculate the same.
if (CFG.L0_TAG_WIDTH == CFG.L0_EARLY_TAG_WIDTH) begin : gen_hit_assign
assign hit[i] = hit_early[i];
// Compare the rest of the tag.
end else begin : gen_hit
assign hit[i] = hit_early[i] &
(tag[i].tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]
== addr_tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]);
end
assign hit_prefetch[i] = tag[i].vld & (tag[i].tag[CFG.L0_TAG_WIDTH-1:0] == addr_tag_prefetch);
end
end else begin
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_cmp_fetch
// Compute a parity error vector comparing the expected parity bit and the current one.
assign tag_parity_error_vect[i] = (exp_tag_parity[i] != tag[i].tag[CFG.L0_TAG_WIDTH]) && tag[i].vld;
assign hit_early[i] = tag[i].vld &
(tag[i].tag[CFG.L0_EARLY_TAG_WIDTH-1:0] == addr_tag[CFG.L0_EARLY_TAG_WIDTH-1:0]);
// The two signals calculate the same.
if (CFG.L0_TAG_WIDTH == CFG.L0_EARLY_TAG_WIDTH) begin : gen_hit_assign
assign hit[i] = hit_early[i] & !tag_parity_error_vect[i];
// Compare the rest of the tag and corresponding parity error vector.
end else begin : gen_hit
assign hit[i] = hit_early[i] &
(tag[i].tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]
== addr_tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]) & !tag_parity_error_vect[i];
end
assign hit_prefetch[i] = tag[i].vld & (tag[i].tag[CFG.L0_TAG_WIDTH-1:0] == addr_tag_prefetch);
end
end
if(RELIABILITY_MODE) begin
// A parity error in the data overwrites the hit into a miss
assign hit_any = |hit && !data_parity_error;
end else begin
assign hit_any = |hit;
end
assign hit_prefetch_any = |hit_prefetch;
assign miss = ~hit_any & in_valid_i & ~pending_refill_q;

Expand All @@ -129,49 +167,110 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
.clk_o (clk_inv)
);

logic [DATA_PARITY_WIDTH-1:0] data_parity;
if (RELIABILITY_MODE) begin
// For every block of the configured block size, compute the parity bit
for (genvar j = 0; j < DATA_PARITY_WIDTH; j++) begin
assign data_parity[j] = ~^out_rsp_data_i[CFG.LINE_WIDTH - LINE_SPLIT*j -1 -: LINE_SPLIT];
end
end
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_array
// Tag Array
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
tag[i].vld <= 0;
tag[i].tag <= 0;
end else begin
if (evict_strb[i]) begin
tag[i].vld <= 1'b0;
tag[i].tag <= evict_because_prefetch ? addr_tag_prefetch : addr_tag;
end else if (validate_strb[i]) begin
tag[i].vld <= 1'b1;
if(!RELIABILITY_MODE) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
tag[i].vld <= 0;
tag[i].tag <= 0;
end else begin
if (evict_strb[i]) begin
tag[i].vld <= 1'b0;
tag[i].tag <= evict_because_prefetch ? addr_tag_prefetch : addr_tag;
end else if (validate_strb[i]) begin
tag[i].vld <= 1'b1;
end
if (flush_strb[i]) begin
tag[i].vld <= 1'b0;
end
end
if (flush_strb[i]) begin
tag[i].vld <= 1'b0;
end
if (CFG.EARLY_LATCH) begin : gen_latch
logic clk_vld;
tc_clk_gating i_clk_gate (
.clk_i (clk_inv ),
.en_i (validate_strb[i]),
.test_en_i (1'b0 ),
.clk_o (clk_vld )
);
// Data Array
/* verilator lint_off NOLATCH */
always_latch begin
if (clk_vld) begin
data[i] <= out_rsp_data_i;
end
end
/* verilator lint_on NOLATCH */
end else begin : gen_ff
`FFLNR(data[i], out_rsp_data_i, validate_strb[i], clk_i)
end
end
if (CFG.EARLY_LATCH) begin : gen_latch
logic clk_vld;
tc_clk_gating i_clk_gate (
.clk_i (clk_inv ),
.en_i (validate_strb[i]),
.test_en_i (1'b0 ),
.clk_o (clk_vld )
);
// Data Array
/* verilator lint_off NOLATCH */
always_latch begin
if (clk_vld) begin
data[i] <= out_rsp_data_i;
end else begin
// Compute parity bit
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
tag[i].vld <= 0;
tag[i].tag <= 0;
end else begin
if (evict_strb[i]) begin
tag[i].vld <= 1'b0;
tag[i].tag <= evict_because_prefetch ? {~^addr_tag_prefetch, addr_tag_prefetch} : {~^addr_tag, addr_tag};
end else if (validate_strb[i]) begin
tag[i].vld <= 1'b1;
end
if (flush_strb[i]) begin
tag[i].vld <= 1'b0;
end
end
end
if (CFG.EARLY_LATCH) begin : gen_latch
logic clk_vld;
tc_clk_gating i_clk_gate (
.clk_i (clk_inv ),
.en_i (validate_strb[i]),
.test_en_i (1'b0 ),
.clk_o (clk_vld )
);
// Data Array
// Store both the parity and the data
/* verilator lint_off NOLATCH */
always_latch begin
if (clk_vld) begin
data[i] <= {data_parity, out_rsp_data_i};
end
end
/* verilator lint_on NOLATCH */
end else begin : gen_ff
`FFLNR(data[i], {data_parity, out_rsp_data_i}, validate_strb[i], clk_i)
end
/* verilator lint_on NOLATCH */
end else begin : gen_ff
`FFLNR(data[i], out_rsp_data_i, validate_strb[i], clk_i)
end
end

// ----
// HIT
// ----
// we hit in the cache and there was a unique hit.
logic [CFG.L0_LINE_COUNT-1:0] data_parity_error_vect;

if (RELIABILITY_MODE) begin
logic [CFG.L0_LINE_COUNT-1:0][DATA_PARITY_WIDTH-1:0] exp_data_parity;
// For every line, we compute the expected parity for every block, then we determine which lines are faulty
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin
for (genvar j = 0; j < DATA_PARITY_WIDTH; j++) begin
assign exp_data_parity[i][j] = ~^data[i][CFG.LINE_WIDTH - LINE_SPLIT*j -1 -: LINE_SPLIT];
end
assign data_parity_error_vect[i] = (exp_data_parity[i] != data[i][CFG.LINE_WIDTH+:DATA_PARITY_WIDTH]) && tag[i].vld;
end
// Check whether the currently selected data has an error
assign data_parity_error = data_parity_error_vect >> in_addr_i[CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN];
end
assign in_ready_o = hit_any & hit_early_is_onehot;

logic [CFG.LINE_WIDTH-1:0] ins_data;
Expand Down Expand Up @@ -225,9 +324,24 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
// but didn't hit in the final comparison.
flush_strb = ~hit & hit_early;
end
if (RELIABILITY_MODE && tag_parity_error_vect!='0) begin
// Evict all tags that have a fault
flush_strb = flush_strb | tag_parity_error_vect;
end
if (RELIABILITY_MODE && data_parity_error_vect!='0) begin
// Evict entry that hit but has faults on the data
flush_strb = flush_strb | data_parity_error_vect;
end
if (flush_valid_i) flush_strb = '1;
end

// Send a warning that an error was detected
always @ (posedge clk_i) begin
if (RELIABILITY_MODE && tag_parity_error_vect != '0 && data_parity_error_vect != '0) $display("%t [l0cache]: tag and data fault: flushing tags: %b",$time, flush_strb);
else if (RELIABILITY_MODE && tag_parity_error_vect != '0) $display("%t [l0cache]: tag fault: flushing tags: %b",$time, flush_strb);
else if (RELIABILITY_MODE && data_parity_error_vect != '0) $display("%t [l0cache]: data fault: flushing tags: %b",$time, flush_strb);
end

`FF(cnt_q, cnt_d, '0)

// -------------
Expand Down
Loading