diff --git a/core/cache_subsystem/cache_ctrl.sv b/core/cache_subsystem/cache_ctrl.sv index 2cd60c1f5a3..9c7cf28ce82 100644 --- a/core/cache_subsystem/cache_ctrl.sv +++ b/core/cache_subsystem/cache_ctrl.sv @@ -298,18 +298,18 @@ module cache_ctrl // two memory look-ups on a single-ported SRAM and therefore is non-atomic if (!mshr_index_matches_i) begin // store data, write dirty bit - req_o = hit_way_q; - addr_o = mem_req_q.index; - we_o = 1'b1; + req_o = hit_way_q; + addr_o = mem_req_q.index; + we_o = 1'b1; - be_o.vldrty = hit_way_q; + be_o.vldrty = hit_way_q; // set the correct byte enable - be_o.data[cl_offset>>3+:8] = mem_req_q.be; - data_o.data[cl_offset+:64] = mem_req_q.wdata; + be_o.data[cl_offset>>3+:8] = mem_req_q.be; + data_o.data[cl_offset+:64] = mem_req_q.wdata; // ~> change the state - data_o.dirty = 1'b1; - data_o.valid = 1'b1; + data_o.dirty[cl_offset>>3+:8] = mem_req_q.be; + data_o.valid = 1'b1; // got a grant ~> this is finished now if (gnt_i) begin diff --git a/core/cache_subsystem/miss_handler.sv b/core/cache_subsystem/miss_handler.sv index 5cce9eb6e16..deb7dbdb230 100644 --- a/core/cache_subsystem/miss_handler.sv +++ b/core/cache_subsystem/miss_handler.sv @@ -151,7 +151,7 @@ module miss_handler automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way; for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin - evict_way[i] = data_i[i].valid & data_i[i].dirty; + evict_way[i] = data_i[i].valid & (|data_i[i].dirty); valid_way[i] = data_i[i].valid; end // ---------------------- @@ -258,10 +258,11 @@ module miss_handler lfsr_enable = 1'b1; evict_way_d = lfsr_oh; // do we need to write back the cache line? - if (data_i[lfsr_bin].dirty) begin + if (|data_i[lfsr_bin].dirty) begin state_d = WB_CACHELINE_MISS; evict_cl_d.tag = data_i[lfsr_bin].tag; evict_cl_d.data = data_i[lfsr_bin].data; + evict_cl_d.dirty = data_i[lfsr_bin].dirty; cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; // no - we can request a cache line now end else state_d = REQ_CACHELINE; @@ -300,7 +301,7 @@ module miss_handler data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; data_o.data = data_miss_fsm; data_o.valid = 1'b1; - data_o.dirty = 1'b0; + data_o.dirty = '0; // is this a write? if (mshr_q.we) begin @@ -310,7 +311,7 @@ module miss_handler if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i]; end // its immediately dirty if we write - data_o.dirty = 1'b1; + data_o.dirty[cl_offset>>3+:8] = mshr_q.be; end // reset MSHR mshr_d.valid = 1'b0; @@ -331,7 +332,7 @@ module miss_handler cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET} {1'b0}} }; - req_fsm_miss_be = '1; + req_fsm_miss_be = evict_cl_q.dirty; req_fsm_miss_we = 1'b1; req_fsm_miss_wdata = evict_cl_q.data; diff --git a/core/cache_subsystem/std_nbdcache.sv b/core/cache_subsystem/std_nbdcache.sv index 4cdee7e4ce8..0cc9647c3f3 100644 --- a/core/cache_subsystem/std_nbdcache.sv +++ b/core/cache_subsystem/std_nbdcache.sv @@ -91,6 +91,7 @@ module std_nbdcache cache_line_t wdata_ram; cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram; cl_be_t be_ram; + vldrty_t [ DCACHE_SET_ASSOC-1:0] be_valid_dirty_ram; // Busy signals logic miss_handler_busy; @@ -223,19 +224,28 @@ module std_nbdcache // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. // note: if you have an SRAM that supports flat bit enables for your target technology, - // you can use it here to save the extra 4x overhead introduced by this workaround. - logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + // you can use it here to save the extra 17x overhead introduced by this workaround. + logic [(DCACHE_LINE_WIDTH+8)*DCACHE_SET_ASSOC-1:0] dirty_wdata, dirty_rdata; for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin - assign dirty_wdata[8*i] = wdata_ram.dirty; - assign dirty_wdata[8*i+1] = wdata_ram.valid; - assign rdata_ram[i].dirty = dirty_rdata[8*i]; - assign rdata_ram[i].valid = dirty_rdata[8*i+1]; + for (genvar j = 0; j < DCACHE_LINE_WIDTH / 8; j++) begin + // dirty bits assignment + assign dirty_wdata[(DCACHE_LINE_WIDTH+8)*i+8*j] = wdata_ram.dirty[j]; + assign rdata_ram[i].dirty[j] = dirty_rdata[(DCACHE_LINE_WIDTH+8)*i+8*j]; + end + // valid bit assignment + assign dirty_wdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i] = wdata_ram.valid; + assign rdata_ram[i].valid = dirty_rdata[DCACHE_LINE_WIDTH+(DCACHE_LINE_WIDTH+8)*i]; + end + + // be construction for valid_dirty_sram + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin + assign be_valid_dirty_ram[i*(DCACHE_LINE_WIDTH/8+1)+:(DCACHE_LINE_WIDTH/8+1)] = {be_ram.vldrty[i], be_ram.data} & {(DCACHE_LINE_WIDTH/8+1){be_ram.vldrty[i]}}; end sram #( .USER_WIDTH(1), - .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH), + .DATA_WIDTH((DCACHE_LINE_WIDTH + 8) * DCACHE_SET_ASSOC), .NUM_WORDS (DCACHE_NUM_WORDS) ) valid_dirty_sram ( .clk_i (clk_i), @@ -245,7 +255,7 @@ module std_nbdcache .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), .wuser_i('0), .wdata_i(dirty_wdata), - .be_i (be_ram.vldrty), + .be_i (be_valid_dirty_ram), .ruser_o(), .rdata_o(dirty_rdata) ); diff --git a/core/include/std_cache_pkg.sv b/core/include/std_cache_pkg.sv index ae812c99740..cdd11a6e451 100644 --- a/core/include/std_cache_pkg.sv +++ b/core/include/std_cache_pkg.sv @@ -62,10 +62,10 @@ package std_cache_pkg; } bypass_rsp_t; typedef struct packed { - logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array - logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array - logic valid; // state array - logic dirty; // state array + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array + logic valid; // state array + logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] dirty; // state array } cache_line_t; // cache line byte enable diff --git a/corev_apu/tb/common/tb_dcache_pkg.sv b/corev_apu/tb/common/tb_dcache_pkg.sv index 7584d81a868..4aa4ce6c59f 100644 --- a/corev_apu/tb/common/tb_dcache_pkg.sv +++ b/corev_apu/tb/common/tb_dcache_pkg.sv @@ -36,7 +36,7 @@ package tb_pkg; parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation... // tb_readport sequences - typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ } seq_t; + typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ, HALF_SEQ } seq_t; typedef enum logic [1:0] { OTHER, BYPASS, CACHED } port_type_t; diff --git a/corev_apu/tb/common/tb_writeport.sv b/corev_apu/tb/common/tb_writeport.sv index babadd91c14..fcc48bca8ee 100644 --- a/corev_apu/tb/common/tb_writeport.sv +++ b/corev_apu/tb/common/tb_writeport.sv @@ -30,6 +30,8 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( input logic rst_ni, // to testbench master + input logic half_i, + input logic [1:0] max_size_i, ref string test_name_i, input logic [6:0] req_rate_i, input seq_t seq_type_i, @@ -63,13 +65,13 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( automatic logic [7:0] be; automatic logic [1:0] size; - void'(randomize(size)); + void'(randomize(size) with {size >= 2'b00; size <= max_size_i;}); // align to size, set correct byte enables be = '0; unique case(size) - 2'b00: be[paddr[2:0] +: 1] = '1; - 2'b01: be[paddr[2:1]<<1 +: 2] = '1; - 2'b10: be[paddr[2:2]<<2 +: 4] = '1; + 2'b00: be[int'(paddr[2:0]) +: 1] = '1; + 2'b01: be[int'(paddr[2:1]<<1) +: 2] = '1; + 2'b10: be[int'(paddr[2:2]<<2) +: 4] = '1; 2'b11: be = '1; default: ; endcase @@ -109,6 +111,7 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( dut_req_port_o.data_req = 1'b1; // generate random address void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<3);}); + if (seq_type_i == HALF_SEQ) paddr[int'(max_size_i)] = half_i; applyRandData(); `APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt) end @@ -278,6 +281,11 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( $display("%s> start random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); genRandReq(); end + HALF_SEQ: begin + $display("%s> start half random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); + $display("%s> half = %b and max size = %b", PortName, half_i, max_size_i); + genRandReq(); + end LINEAR_SEQ: begin $display("%s> start linear sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); genSeqWrite(); diff --git a/corev_apu/tb/tb_wb_dcache/Makefile b/corev_apu/tb/tb_wb_dcache/Makefile index 5877d27e61c..affee866b3c 100755 --- a/corev_apu/tb/tb_wb_dcache/Makefile +++ b/corev_apu/tb/tb_wb_dcache/Makefile @@ -20,7 +20,7 @@ src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-) compile_flag += +cover+i_dut -incr -64 -nologo -svinputport=compat -override_timescale 1ns/1ps -suppress 2583 -suppress 13262 +cover sim_opts += -64 -coverage -classdebug -voptargs="+acc" questa_version ?= ${QUESTASIM_VERSION} -incdir += ../common/ ../../axi/include/ +incdir += ../common/ ../../axi/include/ ../../../common/submodules/common_cells/include/ # Iterate over all include directories and write them with +incdir+ prefixed # +incdir+ works for Verilator and QuestaSim diff --git a/corev_apu/tb/tb_wb_dcache/hdl/tb.sv b/corev_apu/tb/tb_wb_dcache/hdl/tb.sv index 0b4550fec57..c09c6f9dd7b 100644 --- a/corev_apu/tb/tb_wb_dcache/hdl/tb.sv +++ b/corev_apu/tb/tb_wb_dcache/hdl/tb.sv @@ -94,6 +94,8 @@ module tb import ariane_pkg::*; import std_cache_pkg::*; import tb_pkg::*; #()() seq_t [2:0] seq_type; logic [3:0] seq_done; logic [6:0] req_rate[2:0]; + logic half; + logic [1:0] max_size; logic seq_run, seq_last; logic end_of_sim; @@ -234,6 +236,37 @@ module tb import ariane_pkg::*; import std_cache_pkg::*; import tb_pkg::*; #()() `APPL_WAIT_CYC(clk_i, 1) endtask : flushCache + //integer fd = $fopen("extern_write.txt","w"); + // Write directly the tb memory + function automatic void external_writer(int unsigned pos, int unsigned half); + automatic logic[7:0] val; + for (int k=0; k requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 20 -- random write on half memory(MSB) and external writer on the other half -- max size = 64b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b11; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 21 -- random write on half memory(LSB) and external writer on the other half -- max size = 32b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b10; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 22 -- random write on half memory(MSB) and external writer on the other half -- max size = 32b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b10; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 23 -- random write on half memory(LSB) and external writer on the other half -- max size = 16b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b01; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 24 -- random write on half memory(MSB) and external writer on the other half -- max size = 16b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b01; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 25 -- random write on half memory(LSB) and external writer on the other half -- max size = 8b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b00; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 26 -- random write on half memory(MSB) and external writer on the other half -- max size = 8b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b00; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,1); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + end_of_sim = 1; $display("TB> end test sequences"); tb_mem_port_t::report_mem(); diff --git a/corev_apu/tb/tb_wb_dcache/tb.list b/corev_apu/tb/tb_wb_dcache/tb.list index 05180e4ee1a..7d46ce5028c 100644 --- a/corev_apu/tb/tb_wb_dcache/tb.list +++ b/corev_apu/tb/tb_wb_dcache/tb.list @@ -35,12 +35,23 @@ ../../../vendor/pulp-platform/common_cells/src/stream_demux.sv ../../../core/axi_adapter.sv ../../../common/local/util/sram.sv +../../../common/local/util/tc_sram_wrapper.sv +../../src/tech_cells_generic/src/rtl/tc_sram.sv ../../src/axi_riscv_atomics/src/axi_res_tbl.sv ../../src/axi_riscv_atomics/src/axi_riscv_amos.sv ../../src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv ../../src/axi_riscv_atomics/src/axi_riscv_lrsc.sv ../../src/axi_riscv_atomics/src/axi_riscv_atomics.sv ../../src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv +../../../common/submodules/common_cells/src/id_queue.sv +../../../common/submodules/common_cells/src/stream_fork.sv +../../../common/submodules/common_cells/src/stream_filter.sv +../../../common/submodules/common_cells/src/fall_through_register.sv +../../../common/submodules/common_cells/src/stream_register.sv +../../../common/submodules/common_cells/src/spill_register_flushable.sv +../../../common/submodules/common_cells/src/spill_register.sv +../../../common/submodules/common_cells/src/onehot_to_bin.sv +../../axi/src/axi_multicut.sv ../common/tb_dcache_pkg.sv ../common/tb_readport.sv