Skip to content

Commit

Permalink
Add self-invalidation coherence
Browse files Browse the repository at this point in the history
added files required for compilation

Added target to test litmus tests

Per byte dirty bit added to std dcache and tested

basic support for dual core instantiation

Some automation added to the multi-core testing process

minor changes

temporary ci modifications for working without sudo permissions

branch prova

prova modified

Multi core instantiation made generic

Transition between WAIT_CRITICAL_WORD and WAIT_TAG removed if there is a flush - feature tested

Changed repo with master branch and added masks for reservation at cacheline granularity because burst not supported

Fix the never return problem for non boot cores and dt modified for 2 cores

Added master branch of common_cells and compilation of new file in Makefile

Added transition between FLUSHING and FLUSHING to avoid multiple flushs during atomics

Unused code removed and code commented

Added support to use the master branch of the axi_riscv_atomics repository

Added support for multiple ariane instances for fpga synthesis

Increased stack for big applications and reduced number of harts

Pheripherals configured to use multiple cores

List of issues not solved encountered during the master thesis

Co-authored-by: msc22h2 <[email protected]>
Signed-off-by: Nils Wistoff <[email protected]>
  • Loading branch information
niwis and Michelangelo98 committed Feb 16, 2024
1 parent c611618 commit ffa3202
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 31 deletions.
16 changes: 8 additions & 8 deletions core/cache_subsystem/cache_ctrl.sv
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,18 @@ module cache_ctrl
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;

be_o.vldrty = hit_way_q;
be_o.vldrty = hit_way_q;

// set the correct byte enable
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
data_o.dirty[cl_offset>>3+:8] = mem_req_q.be;
data_o.valid = 1'b1;

// got a grant ~> this is finished now
if (gnt_i) begin
Expand Down
11 changes: 6 additions & 5 deletions core/cache_subsystem/miss_handler.sv
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ module miss_handler
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;

for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
evict_way[i] = data_i[i].valid & (|data_i[i].dirty);
valid_way[i] = data_i[i].valid;
end
// ----------------------
Expand Down Expand Up @@ -258,10 +258,11 @@ module miss_handler
lfsr_enable = 1'b1;
evict_way_d = lfsr_oh;
// do we need to write back the cache line?
if (data_i[lfsr_bin].dirty) begin
if (|data_i[lfsr_bin].dirty) begin
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
evict_cl_d.dirty = data_i[lfsr_bin].dirty;
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else state_d = REQ_CACHELINE;
Expand Down Expand Up @@ -300,7 +301,7 @@ module miss_handler
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;
data_o.dirty = '0;

// is this a write?
if (mshr_q.we) begin
Expand All @@ -310,7 +311,7 @@ module miss_handler
if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
end
// its immediately dirty if we write
data_o.dirty = 1'b1;
data_o.dirty[cl_offset>>3+:8] = mshr_q.be;
end
// reset MSHR
mshr_d.valid = 1'b0;
Expand All @@ -331,7 +332,7 @@ module miss_handler
cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET],
{{DCACHE_BYTE_OFFSET} {1'b0}}
};
req_fsm_miss_be = '1;
req_fsm_miss_be = evict_cl_q.dirty;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;

Expand Down
26 changes: 18 additions & 8 deletions core/cache_subsystem/std_nbdcache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ module std_nbdcache
cache_line_t wdata_ram;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
vldrty_t [ DCACHE_SET_ASSOC-1:0] be_valid_dirty_ram;

// Busy signals
logic miss_handler_busy;
Expand Down Expand Up @@ -223,19 +224,28 @@ module std_nbdcache

// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
// you can use it here to save the extra 17x overhead introduced by this workaround.
logic [(DCACHE_LINE_WIDTH+8)*DCACHE_SET_ASSOC-1:0] dirty_wdata, dirty_rdata;

for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
for (genvar j = 0; j < DCACHE_LINE_WIDTH / 8; j++) begin
// dirty bits assignment
assign dirty_wdata[(DCACHE_LINE_WIDTH+8)*i+8*j] = wdata_ram.dirty[j];
assign rdata_ram[i].dirty[j] = dirty_rdata[(DCACHE_LINE_WIDTH+8)*i+8*j];
end
// valid bit assignment
assign dirty_wdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i];
end

// be construction for valid_dirty_sram
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign be_valid_dirty_ram[i*(DCACHE_LINE_WIDTH/8+1)+:(DCACHE_LINE_WIDTH/8+1)] = {be_ram.vldrty[i], be_ram.data} & {(DCACHE_LINE_WIDTH/8+1){be_ram.vldrty[i]}};
end

sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.DATA_WIDTH((DCACHE_LINE_WIDTH + 8) * DCACHE_SET_ASSOC),
.NUM_WORDS (DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
Expand All @@ -245,7 +255,7 @@ module std_nbdcache
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.be_i (be_valid_dirty_ram),
.ruser_o(),
.rdata_o(dirty_rdata)
);
Expand Down
8 changes: 4 additions & 4 deletions core/include/std_cache_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ package std_cache_pkg;
} bypass_rsp_t;

typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] dirty; // state array
} cache_line_t;

// cache line byte enable
Expand Down
2 changes: 1 addition & 1 deletion corev_apu/tb/common/tb_dcache_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ package tb_pkg;
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...

// tb_readport sequences
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ } seq_t;
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ, HALF_SEQ } seq_t;

typedef enum logic [1:0] { OTHER, BYPASS, CACHED } port_type_t;

Expand Down
16 changes: 12 additions & 4 deletions corev_apu/tb/common/tb_writeport.sv
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
input logic rst_ni,

// to testbench master
input logic half_i,
input logic [1:0] max_size_i,
ref string test_name_i,
input logic [6:0] req_rate_i,
input seq_t seq_type_i,
Expand Down Expand Up @@ -63,13 +65,13 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
automatic logic [7:0] be;
automatic logic [1:0] size;

void'(randomize(size));
void'(randomize(size) with {size >= 2'b00; size <= max_size_i;});
// align to size, set correct byte enables
be = '0;
unique case(size)
2'b00: be[paddr[2:0] +: 1] = '1;
2'b01: be[paddr[2:1]<<1 +: 2] = '1;
2'b10: be[paddr[2:2]<<2 +: 4] = '1;
2'b00: be[int'(paddr[2:0]) +: 1] = '1;
2'b01: be[int'(paddr[2:1]<<1) +: 2] = '1;
2'b10: be[int'(paddr[2:2]<<2) +: 4] = '1;
2'b11: be = '1;
default: ;
endcase
Expand Down Expand Up @@ -109,6 +111,7 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<3);});
if (seq_type_i == HALF_SEQ) paddr[int'(max_size_i)] = half_i;
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
Expand Down Expand Up @@ -278,6 +281,11 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
$display("%s> start random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genRandReq();
end
HALF_SEQ: begin
$display("%s> start half random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
$display("%s> half = %b and max size = %b", PortName, half_i, max_size_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genSeqWrite();
Expand Down
2 changes: 1 addition & 1 deletion corev_apu/tb/tb_wb_dcache/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+i_dut -incr -64 -nologo -svinputport=compat -override_timescale 1ns/1ps -suppress 2583 -suppress 13262 +cover
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
incdir += ../common/ ../../axi/include/
incdir += ../common/ ../../axi/include/ ../../../common/submodules/common_cells/include/

# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
Expand Down
Loading

0 comments on commit ffa3202

Please sign in to comment.