diff --git a/core/cache_subsystem/cache_ctrl.sv b/core/cache_subsystem/cache_ctrl.sv index 9491141650..c14aeae0f4 100644 --- a/core/cache_subsystem/cache_ctrl.sv +++ b/core/cache_subsystem/cache_ctrl.sv @@ -322,8 +322,8 @@ module cache_ctrl data_o.data[cl_offset+:CVA6Cfg.XLEN] = mem_req_q.wdata; data_o.tag = mem_req_d.tag; // ~> change the state - data_o.dirty = 1'b1; - data_o.valid = 1'b1; + data_o.dirty[cl_offset>>3+:CVA6Cfg.XLEN/8] = 1'b1; + data_o.valid = 1'b1; // got a grant ~> this is finished now if (gnt_i) begin diff --git a/core/cache_subsystem/miss_handler.sv b/core/cache_subsystem/miss_handler.sv index 6a3a84c4ae..f00ed8d5a1 100644 --- a/core/cache_subsystem/miss_handler.sv +++ b/core/cache_subsystem/miss_handler.sv @@ -177,7 +177,7 @@ module miss_handler automatic logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] evict_way, valid_way; for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin - evict_way[i] = data_i[i].valid & data_i[i].dirty; + evict_way[i] = data_i[i].valid & (|data_i[i].dirty); valid_way[i] = data_i[i].valid; end // ---------------------- @@ -287,10 +287,11 @@ module miss_handler lfsr_enable = 1'b1; evict_way_d = lfsr_oh; // do we need to write back the cache line? - if (data_i[lfsr_bin].dirty) begin + if (|data_i[lfsr_bin].dirty) begin state_d = WB_CACHELINE_MISS; evict_cl_d.tag = data_i[lfsr_bin].tag; evict_cl_d.data = data_i[lfsr_bin].data; + evict_cl_d.dirty = data_i[lfsr_bin].dirty; cnt_d = mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; // no - we can request a cache line now end else state_d = REQ_CACHELINE; @@ -328,7 +329,7 @@ module miss_handler data_o.tag = mshr_q.addr[CVA6Cfg.DCACHE_TAG_WIDTH+CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH]; data_o.data = data_miss_fsm; data_o.valid = 1'b1; - data_o.dirty = 1'b0; + data_o.dirty = '0; // is this a write? if (mshr_q.we) begin @@ -338,7 +339,7 @@ module miss_handler if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i]; end // its immediately dirty if we write - data_o.dirty = 1'b1; + data_o.dirty[cl_offset>>3+:8] = mshr_q.be; end // reset MSHR mshr_d.valid = 1'b0; @@ -359,7 +360,7 @@ module miss_handler cnt_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH], {{CVA6Cfg.DCACHE_OFFSET_WIDTH} {1'b0}} }; - req_fsm_miss_be = '1; + req_fsm_miss_be = evict_cl_q.dirty; req_fsm_miss_we = 1'b1; req_fsm_miss_wdata = evict_cl_q.data; diff --git a/core/cache_subsystem/std_nbdcache.sv b/core/cache_subsystem/std_nbdcache.sv index 1d7c813b59..4f6c123390 100644 --- a/core/cache_subsystem/std_nbdcache.sv +++ b/core/cache_subsystem/std_nbdcache.sv @@ -62,6 +62,10 @@ module std_nbdcache logic [(CVA6Cfg.DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) }; + typedef struct packed { + logic [CVA6Cfg.DCACHE_LINE_WIDTH/8-1:0] dirty; + logic valid; + } vldrty_t; // ------------------------------- // Controller <-> Arbiter @@ -107,6 +111,7 @@ module std_nbdcache cache_line_t wdata_ram; cache_line_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata_ram; cl_be_t be_ram; + vldrty_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] be_valid_dirty_ram; // Busy signals logic miss_handler_busy; @@ -245,19 +250,28 @@ module std_nbdcache // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. // note: if you have an SRAM that supports flat bit enables for your target technology, - // you can use it here to save the extra 4x overhead introduced by this workaround. - logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + // you can use it here to save the extra 17x overhead introduced by this workaround. + logic [(CVA6Cfg.DCACHE_LINE_WIDTH+8)*CVA6Cfg.DCACHE_SET_ASSOC-1:0] dirty_wdata, dirty_rdata; for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin - assign dirty_wdata[8*i] = wdata_ram.dirty; - assign dirty_wdata[8*i+1] = wdata_ram.valid; - assign rdata_ram[i].dirty = dirty_rdata[8*i]; - assign rdata_ram[i].valid = dirty_rdata[8*i+1]; + for (genvar j = 0; j < CVA6Cfg.DCACHE_LINE_WIDTH / 8; j++) begin + // dirty bits assignment + assign dirty_wdata[(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i+8*j] = wdata_ram.dirty[j]; + assign rdata_ram[i].dirty[j] = dirty_rdata[(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i+8*j]; + end + // valid bit assignment + assign dirty_wdata[CVA6Cfg.DCACHE_LINE_WIDTH+(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i] = wdata_ram.valid; + assign rdata_ram[i].valid = dirty_rdata[CVA6Cfg.DCACHE_LINE_WIDTH+(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i]; + end + + // be construction for valid_dirty_sram + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin + assign be_valid_dirty_ram[i*(CVA6Cfg.DCACHE_LINE_WIDTH/8+1)+:(CVA6Cfg.DCACHE_LINE_WIDTH/8+1)] = {be_ram.vldrty[i], be_ram.data} & {(CVA6Cfg.DCACHE_LINE_WIDTH/8+1){be_ram.vldrty[i]}}; end sram #( .USER_WIDTH(1), - .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH), + .DATA_WIDTH((CVA6Cfg.DCACHE_LINE_WIDTH + 8) * CVA6Cfg.DCACHE_SET_ASSOC), .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) valid_dirty_sram ( .clk_i (clk_i), @@ -267,7 +281,7 @@ module std_nbdcache .addr_i (addr_ram[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]), .wuser_i('0), .wdata_i(dirty_wdata), - .be_i (be_ram.vldrty), + .be_i (be_valid_dirty_ram), .ruser_o(), .rdata_o(dirty_rdata) ); diff --git a/corev_apu/tb/common/tb_dcache_pkg.sv b/corev_apu/tb/common/tb_dcache_pkg.sv index 7584d81a86..4aa4ce6c59 100644 --- a/corev_apu/tb/common/tb_dcache_pkg.sv +++ b/corev_apu/tb/common/tb_dcache_pkg.sv @@ -36,7 +36,7 @@ package tb_pkg; parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation... // tb_readport sequences - typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ } seq_t; + typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ, HALF_SEQ } seq_t; typedef enum logic [1:0] { OTHER, BYPASS, CACHED } port_type_t; diff --git a/corev_apu/tb/common/tb_writeport.sv b/corev_apu/tb/common/tb_writeport.sv index 46cb362875..6951a86480 100644 --- a/corev_apu/tb/common/tb_writeport.sv +++ b/corev_apu/tb/common/tb_writeport.sv @@ -33,6 +33,8 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( input logic rst_ni, // to testbench master + input logic half_i, + input logic [1:0] max_size_i, ref string test_name_i, input logic [6:0] req_rate_i, input seq_t seq_type_i, @@ -66,13 +68,13 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( automatic logic [CVA6Cfg.XLEN/8-1:0] be; automatic logic [1:0] size; - void'(randomize(size) with {size <= $clog2(CVA6Cfg.XLEN/8);}); + void'(randomize(size) with {size >= 2'b00; size <= max_size_i; size <= $clog2(CVA6Cfg.XLEN/8);}); // align to size, set correct byte enables be = '0; unique case(size) - 2'b00: be[paddr[2:0] +: 1] = '1; - 2'b01: be[paddr[2:1]<<1 +: 2] = '1; - 2'b10: be[paddr[2:2]<<2 +: 4] = '1; + 2'b00: be[int'(paddr[2:0]) +: 1] = '1; + 2'b01: be[int'(paddr[2:1]<<1) +: 2] = '1; + 2'b10: be[int'(paddr[2:2]<<2) +: 4] = '1; 2'b11: be = '1; default: ; endcase @@ -112,6 +114,7 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( dut_req_port_o.data_req = 1'b1; // generate random address void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<$clog2(CVA6Cfg.XLEN/8));}); + if (seq_type_i == HALF_SEQ) paddr[int'(max_size_i)] = half_i; applyRandData(); `APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt) end @@ -281,6 +284,11 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #( $display("%s> start random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); genRandReq(); end + HALF_SEQ: begin + $display("%s> start half random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); + $display("%s> half = %b and max size = %b", PortName, half_i, max_size_i); + genRandReq(); + end LINEAR_SEQ: begin $display("%s> start linear sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i); genSeqWrite(); diff --git a/corev_apu/tb/tb_wb_dcache/Makefile b/corev_apu/tb/tb_wb_dcache/Makefile index 81c6d411cd..de6df0fafe 100755 --- a/corev_apu/tb/tb_wb_dcache/Makefile +++ b/corev_apu/tb/tb_wb_dcache/Makefile @@ -20,7 +20,7 @@ src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-) compile_flag += +cover+i_dut -incr -64 -nologo -svinputport=compat -override_timescale 1ns/1ps -suppress 2583 -suppress 13262 -suppress 2986 +cover sim_opts += -64 -coverage -classdebug -voptargs="+acc" questa_version ?= ${QUESTASIM_VERSION} -incdir += ../common/ ../../../vendor/pulp-platform/axi/include/ +incdir += ../common/ ../../../vendor/pulp-platform/axi/include/ ../../../vendor/pulp-platform/common_cells/include/ # Iterate over all include directories and write them with +incdir+ prefixed # +incdir+ works for Verilator and QuestaSim diff --git a/corev_apu/tb/tb_wb_dcache/hdl/tb.sv b/corev_apu/tb/tb_wb_dcache/hdl/tb.sv index 37e992c220..c8522316b0 100644 --- a/corev_apu/tb/tb_wb_dcache/hdl/tb.sv +++ b/corev_apu/tb/tb_wb_dcache/hdl/tb.sv @@ -118,6 +118,8 @@ module tb import ariane_pkg::*; import std_cache_pkg::*; import tb_pkg::*; #( seq_t [2:0] seq_type; logic [3:0] seq_done; logic [6:0] req_rate[2:0]; + logic half; + logic [1:0] max_size; logic seq_run, seq_last; logic end_of_sim; @@ -259,6 +261,37 @@ module tb import ariane_pkg::*; import std_cache_pkg::*; import tb_pkg::*; #( `APPL_WAIT_CYC(clk_i, 1) endtask : flushCache + //integer fd = $fopen("extern_write.txt","w"); + // Write directly the tb memory + function automatic void external_writer(int unsigned pos, int unsigned half); + automatic logic[7:0] val; + for (int k=0; k requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 20 -- random write on half memory(MSB) and external writer on the other half -- max size = 64b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b11; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 21 -- random write on half memory(LSB) and external writer on the other half -- max size = 32b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b10; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 22 -- random write on half memory(MSB) and external writer on the other half -- max size = 32b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b10; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 23 -- random write on half memory(LSB) and external writer on the other half -- max size = 16b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b01; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 24 -- random write on half memory(MSB) and external writer on the other half -- max size = 16b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b01; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 25 -- random write on half memory(LSB) and external writer on the other half -- max size = 8b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b00; + half = 0; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,0); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + test_name = "TEST 26 -- random write on half memory(MSB) and external writer on the other half -- max size = 8b -- enabled cache + tlb, mem contentions + invalidations"; + + // Config + enable_i = 1; + tlb_rand_en = 1; + mem_rand_en = 1; + inv_rand_en = 1; + max_size = 2'b00; + half = 1; + seq_type = '{HALF_SEQ, RANDOM_SEQ, RANDOM_SEQ}; + req_rate = '{default:50}; + + // cache enabled ~> requests to cached region should use cache port, + // those to uncached regions should use bypass port + bypass_mem_port.set_region(0, CachedAddrBeg - 1); + data_mem_port.set_region(CachedAddrBeg, MemBytes - 1); + + runSeq(0,nWriteVectors,1); + external_writer(int'(max_size),int'(!half)); + flushCache(); + tb_mem_port_t::check_mem(); + + ////////////////////////////////////////////// + end_of_sim = 1; $display("TB> end test sequences"); tb_mem_port_t::report_mem(); diff --git a/corev_apu/tb/tb_wb_dcache/tb.list b/corev_apu/tb/tb_wb_dcache/tb.list index d08c0adaef..dbda19fedc 100644 --- a/corev_apu/tb/tb_wb_dcache/tb.list +++ b/corev_apu/tb/tb_wb_dcache/tb.list @@ -39,12 +39,23 @@ hdl/cv64a6_config_pkg.sv ../../../vendor/pulp-platform/common_cells/src/stream_demux.sv ../../../core/cache_subsystem/axi_adapter.sv ../../../common/local/util/sram.sv +../../../common/local/util/tc_sram_wrapper.sv +../../src/tech_cells_generic/src/rtl/tc_sram.sv ../../src/axi_riscv_atomics/src/axi_res_tbl.sv ../../src/axi_riscv_atomics/src/axi_riscv_amos.sv ../../src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv ../../src/axi_riscv_atomics/src/axi_riscv_lrsc.sv ../../src/axi_riscv_atomics/src/axi_riscv_atomics.sv ../../src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv +../../../common/submodules/common_cells/src/id_queue.sv +../../../common/submodules/common_cells/src/stream_fork.sv +../../../common/submodules/common_cells/src/stream_filter.sv +../../../common/submodules/common_cells/src/fall_through_register.sv +../../../common/submodules/common_cells/src/stream_register.sv +../../../common/submodules/common_cells/src/spill_register_flushable.sv +../../../common/submodules/common_cells/src/spill_register.sv +../../../common/submodules/common_cells/src/onehot_to_bin.sv +../../axi/src/axi_multicut.sv ../common/tb_dcache_pkg.sv ../common/tb_readport.sv