From d573fb9e332e0ccf873ee85fd569f4517f18f031 Mon Sep 17 00:00:00 2001 From: xinhecuican Date: Fri, 3 Jan 2025 10:56:18 +0800 Subject: [PATCH 1/3] chore: append perf counters --- src/core/backend/issue/CsrIssueQueue.sv | 2 + src/core/backend/lsu/dcache/DCacheMiss.sv | 1 + src/core/frontend/BranchPredictor.sv | 14 +- src/core/frontend/FSQ.sv | 9 +- src/core/frontend/HistoryControl.sv | 28 ++- src/core/frontend/RAS.sv | 230 +--------------------- src/core/frontend/UBTB.sv | 2 + src/core/mmu/PTW.sv | 2 + src/defines/arch.svh | 1 - src/defines/bundles.svh | 15 +- src/defines/global.svh | 5 +- 11 files changed, 58 insertions(+), 251 deletions(-) diff --git a/src/core/backend/issue/CsrIssueQueue.sv b/src/core/backend/issue/CsrIssueQueue.sv index f36b503..c49a88b 100644 --- a/src/core/backend/issue/CsrIssueQueue.sv +++ b/src/core/backend/issue/CsrIssueQueue.sv @@ -329,4 +329,6 @@ generate end end endgenerate + + `PERF(redirect_fence, fenceBus.fence_end) endmodule \ No newline at end of file diff --git a/src/core/backend/lsu/dcache/DCacheMiss.sv b/src/core/backend/lsu/dcache/DCacheMiss.sv index 2ff9567..99607ff 100644 --- a/src/core/backend/lsu/dcache/DCacheMiss.sv +++ b/src/core/backend/lsu/dcache/DCacheMiss.sv @@ -474,5 +474,6 @@ endgenerate assign r_axi_io.r_ready = 1'b1; `PERF(load_miss, rlast & (|mshr_hit_combine)) + `PERF(dcache_miss, r_axi_io.ar_valid & r_axi_io.ar_ready) endmodule \ No newline at end of file diff --git a/src/core/frontend/BranchPredictor.sv b/src/core/frontend/BranchPredictor.sv index 76ac357..f0e0209 100644 --- a/src/core/frontend/BranchPredictor.sv +++ b/src/core/frontend/BranchPredictor.sv @@ -30,6 +30,7 @@ module BranchPredictor( RasRedirectInfo ras_info_s3; logic ras_valid_s2, ras_valid_s3_in, ras_valid_s3_out; BTBUpdateInfo entry_s2; + logic `N(`SLOT_NUM) tage_pred_s3; assign squash = bpu_fsq_io.squash; assign squashInfo = bpu_fsq_io.squashInfo; @@ -40,10 +41,12 @@ module BranchPredictor( BTB btb(.*); assign tage_io.pc = pc; Tage tage(.*); +`ifdef FEAT_SC assign sc_io.pc = pc; assign sc_io.tage_prediction = tage_io.prediction; assign sc_io.tage_ctrs = tage_io.provider_ctr; SC sc(.*, .io(sc_io)); +`endif assign ittage_io.pc = pc; `ifdef FEAT_ITTAGE_REGION assign ittage_io.region_idx = entry_s2.tailSlot.target[`ITTAGE_REGION_WIDTH-1: 0]; @@ -77,11 +80,7 @@ module BranchPredictor( assign ras_io.lastStage = s3_result_out.en & ~redirect.flush; assign ras_io.lastStageIdx = s3_result_out.stream_idx; `endif -`ifdef FEAT_LINKRAS - LinkRAS ras(.*); -`else RAS ras(.*); -`endif assign redirect.s2_redirect = s2_result_out.en && s2_result_out.redirect; assign redirect.s3_redirect = s3_result_out.en && s3_result_out.redirect; @@ -132,6 +131,7 @@ module BranchPredictor( end ras_addr_s3 <= ras_io.entry.pc; ras_info_s3 <= ras_io.rasInfo; + tage_pred_s3 <= tage_io.prediction; end assign bpu_fsq_io.en = bpu_fsq_io.prediction.en & ~redirect.flush; @@ -185,13 +185,19 @@ module BranchPredictor( s2_meta_out = s2_meta_in; s2_meta_out.tage = tage_io.meta; s3_meta_out = s3_meta_in; +`ifdef FEAT_SC s3_meta_out.sc = sc_io.meta; +`endif s3_meta_out.ittage = ittage_io.meta; end S3Control s3_control( .pc(s3_result_in.stream.start_addr), +`ifdef FEAT_SC .prediction(sc_io.prediction), +`else + .prediction(tage_pred_s3), +`endif .ras_addr(ras_addr_s3), .ind_addr(ittage_io.target), .ras_info(ras_info_s3), diff --git a/src/core/frontend/FSQ.sv b/src/core/frontend/FSQ.sv index daa8f7a..bbb4b7a 100644 --- a/src/core/frontend/FSQ.sv +++ b/src/core/frontend/FSQ.sv @@ -605,7 +605,7 @@ endgenerate update_tail_taken <= pred_error ? (commitWBInfo.br_type != CONDITION) & ~commitWBInfo.exception : ~(|u_predInfo.condHist) & commitStream.taken; update_indirect <= pred_error ? commitWBInfo.br_type == INDIRECT || commitWBInfo.br_type == INDIRECT_CALL : - ~(|u_predInfo.condHist) & commitStream.taken & (oldEntry.tailSlot.br_type == INDIRECT) | (oldEntry.tailSlot.br_type == INDIRECT_CALL); + ~(|u_predInfo.condHist) & commitStream.taken & ((oldEntry.tailSlot.br_type == INDIRECT) | (oldEntry.tailSlot.br_type == INDIRECT_CALL)); end always_comb begin update_btb_entry = update_btb_entry_pre; @@ -616,7 +616,7 @@ endgenerate `endif end - logic `N(`FSQ_WIDTH) exception_head, exception_head_n; + logic `N(`FSQ_WIDTH) exception_head, exception_head_n, exception_head_pre; logic `ARRAY(`COMMIT_WIDTH, `FSQ_WIDTH) commitFsqIdx; logic commit_exc_valid; logic `N(`COMMIT_WIDTH) commit_older; @@ -639,6 +639,7 @@ endgenerate exc_widx <= pd_redirect.fsqIdx.idx; exc_waddr <= pd_redirect.stream.start_addr; pd_size <= pd_redirect.size; + exception_head_pre <= exception_head; end always_ff @(posedge clk, posedge rst)begin if(rst == `RST)begin @@ -693,7 +694,7 @@ endgenerate .ready() ); logic `N($clog2(`COMMIT_WIDTH)) exc_stream_idx; - assign exc_stream_idx = commitBus.fsqInfo[0].idx - exception_head; + assign exc_stream_idx = commitBus.fsqInfo[0].idx - exception_head_pre; assign fsq_back_io.commitStreamSize = exception_addrs[exc_stream_idx][`VADDR_SIZE +: `PREDICTION_WIDTH]; `ifdef RVC @@ -736,7 +737,7 @@ endgenerate `endif logic `N($clog2(`COMMIT_WIDTH)) exc_ridx; - assign exc_ridx = fsq_back_io.redirect.fsqInfo.idx - exception_head; + assign exc_ridx = fsq_back_io.redirect.fsqInfo.idx - exception_head_pre; assign fsq_back_io.exc_pc = exception_addrs[exc_ridx][`VADDR_SIZE-1: 0] + {fsq_back_io.redirect.fsqInfo.offset, {`INST_OFFSET{1'b0}}}; `ifdef DIFFTEST diff --git a/src/core/frontend/HistoryControl.sv b/src/core/frontend/HistoryControl.sv index 209dc15..4b4824b 100644 --- a/src/core/frontend/HistoryControl.sv +++ b/src/core/frontend/HistoryControl.sv @@ -12,7 +12,9 @@ module HistoryControl( logic `N(`GHIST_SIZE) ghist; TageFoldHistory tage_history, tage_input_history, tage_update_history; logic `N(`SC_GHIST_WIDTH) sc_ghist, sc_ghist_red; +`ifdef IMLI_VALID logic `N(`SC_IMLI_WIDTH) imli, imli_red; +`endif logic `N(`GHIST_WIDTH) pos; logic `ARRAY(`SLOT_NUM, `GHIST_WIDTH) we_idx; logic `N(`SLOT_NUM) ghist_we; @@ -36,7 +38,9 @@ module HistoryControl( assign we_idx[1] = redirect.flush ? squashInfo.redirectInfo.ghistIdx + 1 : result.redirect_info.ghistIdx + 1; assign sc_ghist_red = result.en & result.redirect ? result.redirect_info.sc_ghist : sc_ghist; +`ifdef IMLI_VALID assign imli_red = result.en & result.redirect ? result.redirect_info.imli : imli; +`endif generate for(genvar i=0; i<`SLOT_NUM; i++)begin assign cond_result[i] = (condNum == (i+1)) & taken; @@ -47,7 +51,9 @@ endgenerate assign history.ghistIdx = pos; assign history.tage_history = tage_history; assign history.sc_ghist = sc_ghist; +`ifdef IMLI_VALID assign history.imli = imli; +`endif localparam [`TAGE_BANK*16-1: 0] tage_hist_length = `TAGE_HIST_LENGTH; generate; for(genvar i=0; i<`TAGE_BANK; i++)begin @@ -99,7 +105,9 @@ endgenerate pos <= 0; tage_history <= 0; sc_ghist <= 0; +`ifdef IMLI_VALID imli <= 0; +`endif end else begin pos <= squash ? squashCondNum + squashInfo.redirectInfo.ghistIdx : @@ -116,26 +124,40 @@ endgenerate if(squash)begin if(squashCondNum == 2)begin sc_ghist <= {squashInfo.redirectInfo.sc_ghist[`SC_GHIST_WIDTH-3: 0], 1'b0, squashInfo.predInfo.taken}; - imli <= squashInfo.predInfo.taken; end else if(squashCondNum == 1)begin sc_ghist <= {squashInfo.redirectInfo.sc_ghist[`SC_GHIST_WIDTH-2: 0], squashInfo.predInfo.taken}; - imli <= squashInfo.predInfo.taken ? squashInfo.redirectInfo.imli + 1 : 0; end else begin sc_ghist <= squashInfo.redirectInfo.sc_ghist; + end +`ifdef IMLI_VALID + if(squashCondNum == 2)begin + imli <= squashInfo.predInfo.taken; + end + else if(squashCondNum == 1)begin + imli <= squashInfo.predInfo.taken ? squashInfo.redirectInfo.imli + 1 : 0; + end + else begin imli <= squashInfo.redirectInfo.imli; end +`endif end else if(result.en)begin if(result.cond_num == 2)begin sc_ghist <= {sc_ghist_red[`SC_GHIST_WIDTH-3: 0], 1'b0, |result.predTaken}; - imli <= |result.predTaken; end else if(result.cond_num == 1)begin sc_ghist <= {sc_ghist_red[`SC_GHIST_WIDTH-2: 0], |result.predTaken}; + end +`ifdef IMLI_VALID + if(result.cond_Num == 2)begin + imli <= |result.predTaken; + end + else if(result.cond_num == 1)begin imli <= |result.predTaken ? imli_red + 1 : 0; end +`endif end end end diff --git a/src/core/frontend/RAS.sv b/src/core/frontend/RAS.sv index 247063e..fde261c 100644 --- a/src/core/frontend/RAS.sv +++ b/src/core/frontend/RAS.sv @@ -5,8 +5,7 @@ // 一种方法为在commit时写入来修复错误,但是在squash后commit前这一段时间读取会导致错误 // 并且commit会影响推测更新的结果 // 下面是另一种方法,记录所有inflight的push地址,这样当redirect时不会因为pop-push操作导致覆盖 -`ifdef FEAT_LINKRAS -module LinkRAS( +module RAS( input logic clk, input logic rst, BpuRASIO.ras ras_io @@ -251,229 +250,4 @@ module LinkRAS( $sformatf("ras commit top mismatch")) `endif `endif -endmodule -`else -module RAS( - input logic clk, - input logic rst, - BpuRASIO.ras ras_io -); - logic `N(`RAS_WIDTH) top, top_p1, top_n1; - logic `N(`RAS_WIDTH) bottom, bottom_n1, redirect_bottom_n1; - logic `N(`RAS_WIDTH) redirect_p1, redirect_n1; - logic `N(`RAS_WIDTH) commit_top; - logic bdir, tdir, bdir_n, rbdir_n; - logic `N(`RAS_WIDTH) waddr, commit_waddr; - logic `N(`RAS_SIZE) speculate, spec_pop, spec_push, speculate_mask; - logic `N(`RAS_SIZE) redirect_valid_mask, redirect_top_mask, redirect_bottom_mask; - logic `N(`RAS_SIZE) commit_mask; - RasEntry entry, updateEntry, commitUpdateEntry, commitEntry; - logic [1: 0] squashType; - RasRedirectInfo r; - BTBUpdateInfo btbEntry; - logic we, commit_we; - logic commit_update; - logic full, empty, redirect_full, redirect_empty; - logic `VADDR_BUS squash_target; - - assign top_p1 = top - 1; - assign top_n1 = top + 1; - assign redirect_p1 = r.rasTop - 1; - assign redirect_n1 = r.rasTop + 1; - LoopAdder #(`RAS_WIDTH, 1) adder_bottom(1'b1, {bottom, bdir}, {bottom_n1, bdir_n}); - LoopAdder #(`RAS_WIDTH, 1) adder_rbottom(1'b1, {r.rasBottom, r.ras_bdir}, {redirect_bottom_n1, rbdir_n}); - assign full = (bdir ^ tdir) & (top == bottom); - assign empty = (bdir == tdir) & (top == bottom); - assign r = ras_io.squashInfo.redirectInfo.rasInfo; - assign btbEntry = ras_io.updateInfo.btbEntry; - assign redirect_full = (r.ras_bdir ^ r.ras_tdir) & (r.rasTop == r.rasBottom); - assign redirect_empty = (r.ras_bdir == r.ras_tdir) & (r.rasTop == r.rasBottom); - MaskGen #(`RAS_SIZE) mask_gen_top (r.rasTop, redirect_top_mask); - MaskGen #(`RAS_SIZE) mask_gen_bottom (r.rasBottom, redirect_bottom_mask); - Decoder #(`RAS_SIZE) decoder_commit_top (commit_top, commit_mask); - assign redirect_valid_mask = redirect_full ? {`RAS_SIZE{1'b1}} : - redirect_empty ? {`RAS_SIZE{1'b0}} : - redirect_top_mask ^ redirect_bottom_mask ^ {`RAS_SIZE{r.ras_bdir ^ r.ras_tdir}}; - assign speculate_mask = redirect_valid_mask & spec_pop & spec_push; - assign waddr = ras_io.squash && squashType == POP_PUSH ? r.rasTop - 1 : - ras_io.squash ? r.rasTop : - ras_io.ras_type == POP_PUSH ? top_p1 : top; - assign commit_waddr = btbEntry.tailSlot.ras_type == POP_PUSH ? commit_top - 1 : commit_top; - -`ifdef RVC - assign squash_target = ras_io.squashInfo.start_addr + {ras_io.squashInfo.offset, {`INST_OFFSET{1'b0}}} + {~ras_io.squashInfo.rvc, ras_io.squashInfo.rvc, 1'b0}; - assign commitUpdateEntry.pc = ras_io.updateInfo.start_addr + {btbEntry.tailSlot.offset, {`INST_OFFSET{1'b0}}} + {~btbEntry.tailSlot.rvc, btbEntry.tailSlot.rvc, 1'b0}; -`else - assign squash_target = ras_io.squashInfo.start_addr + {ras_io.squashInfo.offset, {`INST_OFFSET{1'b0}}} + 4; - assign commitUpdateEntry.pc = ras_io.updateInfo.start_addr + {btbEntry.tailSlot.offset, {`INST_OFFSET{1'b0}}} + 4; -`endif - assign updateEntry.pc = ras_io.squash ? squash_target : ras_io.target; - assign squashType = ras_io.squashInfo.ras_type; - assign we = ~ras_io.squash & ras_io.request & ras_io.ras_type[1] | - ras_io.squash & squashType[1]; - assign commit_we = ras_io.update & ras_io.updateInfo.tailTaken & - (btbEntry.tailSlot.br_type == CALL) & - btbEntry.tailSlot.ras_type[1]; - assign commit_update = ras_io.update & ras_io.updateInfo.tailTaken & (btbEntry.tailSlot.br_type == CALL); - - - assign ras_io.en = ~empty; - assign ras_io.rasInfo.rasTop = top; - assign ras_io.rasInfo.ras_tdir = tdir; - assign ras_io.rasInfo.rasBottom = bottom; - assign ras_io.rasInfo.ras_bdir = bdir; - assign ras_io.entry = speculate[top_p1] & ~spec_push[top_p1] ? commitEntry : entry; - - MPRAM #( - .WIDTH($bits(RasEntry)), - .DEPTH(`RAS_SIZE), - .READ_PORT(1), - .WRITE_PORT(1), - .READ_LATENCY(0) - ) ras ( - .clk(clk), - .rst(rst), - .rst_sync(0), - .en(1'b1), - .we(we), - .waddr(waddr), - .raddr(top_p1), - .wdata(updateEntry), - .rdata(entry), - .ready() - ); - - MPRAM #( - .WIDTH($bits(RasEntry)), - .DEPTH(`RAS_SIZE), - .READ_PORT(1), - .WRITE_PORT(1), - .READ_LATENCY(0) - ) commit_ras ( - .clk, - .rst, - .rst_sync(0), - .en(1'b1), - .we(commit_we), - .waddr(commit_waddr), - .raddr(top_p1), - .wdata(commitUpdateEntry), - .rdata(commitEntry), - .ready() - ); - - always_ff @(posedge clk or posedge rst)begin - if(rst == `RST)begin - top <= 0; - bottom <= 0; - bdir <= 0; - tdir <= 0; - speculate <= 0; - spec_pop <= 0; - spec_push <= 0; - commit_top <= 0; - end - else begin - if(ras_io.squash)begin - if(!ras_io.squashInfo.squash_front)begin - spec_pop <= 0; - spec_push <= 0; - end - if(squashType == POP && !redirect_empty)begin - top <= r.rasTop - 1; - tdir <= redirect_p1[`RAS_WIDTH-1] & ~r.rasTop[`RAS_WIDTH-1] ? ~r.ras_tdir : r.ras_bdir; - end - else if(squashType == PUSH)begin - top <= r.rasTop + 1; - tdir <= r.rasTop[`RAS_WIDTH-1] & ~redirect_n1[`RAS_WIDTH-1] ? ~r.ras_tdir : r.ras_bdir; - end - else begin - top <= r.rasTop; - tdir <= r.ras_tdir; - end - if(squashType == PUSH && redirect_full)begin - bottom <= redirect_bottom_n1; - bdir <= rbdir_n; - end - else begin - bottom <= r.rasBottom; - bdir <= r.ras_bdir; - end - end - else if(ras_io.request)begin - if(ras_io.ras_type == POP && !empty)begin - top <= top_p1; - tdir <= top_p1[`RAS_WIDTH-1] & ~top[`RAS_WIDTH-1] ? ~tdir : tdir; - end - else if(ras_io.ras_type == PUSH)begin - top <= top_n1; - tdir <= top[`RAS_WIDTH-1] & ~top_n1[`RAS_WIDTH-1] ? ~tdir : tdir; - if(full)begin - bottom <= bottom_n1; - bdir <= bdir_n; - end - end - end - - if(ras_io.squashInfo.squash_front & squashType[0] & ~redirect_empty)begin - spec_pop[redirect_p1] <= 1'b1; - end - else if(ras_io.request & ras_io.ras_type[0] & ~empty)begin - spec_pop[top_p1] <= 1'b1; - end - - if(ras_io.squashInfo.squash_front & squashType[1])begin - if(squashType[0])begin - spec_push[redirect_p1] <= 1'b1; - end - else begin - spec_push[r.rasTop] <= 1'b1; - end - end - else if(ras_io.request & ras_io.ras_type[1])begin - if(ras_io.ras_type[0])begin - spec_push[top_p1] <= 1'b1; - end - else begin - spec_push[top] <= 1'b1; - end - end - - for(int i=0; i<`RAS_SIZE; i++)begin - speculate[i] <= (speculate[i] | speculate_mask[i] & ras_io.squash & ~ras_io.squashInfo.squash_front) & - ~(commit_we & commit_mask[i]); - end - - if(commit_update)begin - if(btbEntry.tailSlot.ras_type == POP)begin - commit_top <= commit_top - 1; - end - else if(btbEntry.tailSlot.ras_type == PUSH)begin - commit_top <= commit_top + 1; - end - end - end - end - - `Log(DLog::Debug, T_RAS, ~ras_io.squash & ras_io.request & ras_io.ras_type != NONE, - $sformatf("ras lookup. %d %d %b %b %h %h", top, bottom, tdir, bdir, ras_io.target, ras_io.ras_type)) - `Log(DLog::Debug, T_RAS, ras_io.squash & squashType != NONE, - $sformatf("ras squash. %h %h", squash_target, squashType)) - -`ifdef T_DEBUG - logic `ARRAY(`FSQ_SIZE, `RAS_WIDTH) lookup_idx; - logic update_n; - logic `N(`FSQ_WIDTH) fsqIdx; - always_ff @(posedge clk)begin - update_n <= ras_io.update; - fsqIdx <= ras_io.updateInfo.fsqIdx; - if(ras_io.lastStage)begin - lookup_idx[ras_io.lastStageIdx] <= top; - end - end - `Log(DLog::Debug, T_DEBUG, update_n && (commit_top != lookup_idx[fsqIdx]), - $sformatf("ras commit top mismatch")) -`endif - -endmodule -`endif \ No newline at end of file +endmodule \ No newline at end of file diff --git a/src/core/frontend/UBTB.sv b/src/core/frontend/UBTB.sv index a82156f..3693faf 100644 --- a/src/core/frontend/UBTB.sv +++ b/src/core/frontend/UBTB.sv @@ -74,7 +74,9 @@ endgenerate result_i.redirect_info.tage_history = ubtb_io.history.tage_history; result_i.redirect_info.rasInfo = 0; result_i.redirect_info.sc_ghist = ubtb_io.history.sc_ghist; +`ifdef IMLI_VALID result_i.redirect_info.imli = ubtb_io.history.imli; +`endif result_i.stream.target = ubtb_io.pc + `BLOCK_SIZE; `ifdef RVC result_i.stream.size = `BLOCK_INST_SIZE - 2; diff --git a/src/core/mmu/PTW.sv b/src/core/mmu/PTW.sv index 301d698..87a455d 100644 --- a/src/core/mmu/PTW.sv +++ b/src/core/mmu/PTW.sv @@ -349,6 +349,8 @@ module PTW( endcase end end + + `PERF(ptw_miss, axi_io.ar_valid & axi_io.ar_ready) endmodule interface PTBufferIO #( diff --git a/src/defines/arch.svh b/src/defines/arch.svh index 8945762..4212859 100644 --- a/src/defines/arch.svh +++ b/src/defines/arch.svh @@ -10,7 +10,6 @@ `define EXT_FENCEI -`define FEAT_LINKRAS `define FEAT_SC `define FEAT_ITTAGE_REGION diff --git a/src/defines/bundles.svh b/src/defines/bundles.svh index 1a0aa21..e4e438c 100644 --- a/src/defines/bundles.svh +++ b/src/defines/bundles.svh @@ -105,7 +105,9 @@ typedef struct packed { typedef struct packed { TageMeta tage; UBTBMeta ubtb; +`ifdef FEAT_SC SCMeta sc; +`endif ITTageMeta ittage; } PredictionMeta; @@ -130,28 +132,21 @@ typedef struct packed { logic `ARRAY(`TAGE_BANK, `TAGE_TAG_COMPRESS2) fold_tag2; } TageFoldHistory; -`ifdef FEAT_LINKRAS typedef struct packed { RasInflightIdx rasTop; RasInflightIdx listTop; logic `N(`RAS_WIDTH) inflightTop; logic topInvalid; } RasRedirectInfo; -`else -typedef struct packed { - logic `N(`RAS_WIDTH) rasTop; - logic `N(`RAS_WIDTH) rasBottom; - logic ras_tdir; - logic ras_bdir; -} RasRedirectInfo; -`endif typedef struct packed { logic `N(`GHIST_WIDTH) ghistIdx; TageFoldHistory tage_history; RasRedirectInfo rasInfo; logic `N(`SC_GHIST_WIDTH) sc_ghist; +`ifdef IMLI_VALID logic `N(`SC_IMLI_WIDTH) imli; +`endif // logic `N(`RAS_CTR_SIZE) ras_ctr; } RedirectInfo; @@ -160,7 +155,9 @@ typedef struct packed { // logic `N(`GHIST_SIZE) ghist; // logic `N(`PHIST_SIZE) phist; logic `N(`SC_GHIST_WIDTH) sc_ghist; +`ifdef IMLI_VALID logic `N(`SC_IMLI_WIDTH) imli; +`endif TageFoldHistory tage_history; } BranchHistory; diff --git a/src/defines/global.svh b/src/defines/global.svh index a75b24e..1b48569 100644 --- a/src/defines/global.svh +++ b/src/defines/global.svh @@ -89,11 +89,12 @@ `define SC_HIST_NUM 4 `define SC_SET_WIDTH 9 `define SC_COMMIT_SIZE 4 -`define SC_HIST_DEPTH 64'h0100_0100_0100_0100 -`define SC_HIST_LENGTH 64'h000f_0008_0004_0000 +`define SC_HIST_DEPTH 64'h0200_0200_0200_0200 +`define SC_HIST_LENGTH 64'h000f_000b_0008_0004 // better than 000f_0008_0004_0000 `define SC_HIST_THRESH_DEPTH 64'h0020_0020_0020_0020 `define SC_HIST_INIT 6'b100000 `define SC_IMLI_NUM 0 +// `define IMLI_VALID `define SC_IMLI_DEPTH 16'h0200 `define SC_IMLI_THRESH_DEPTH 16'h0020 `define SC_IMLI_INIT 6'b100000 From 9b2e8dc35f302842315dfd9c7e32747e90c5a4a3 Mon Sep 17 00:00:00 2001 From: xinhecuican Date: Fri, 31 Jan 2025 14:22:24 +0800 Subject: [PATCH 2/3] impl l2 cache --- Makefile | 2 +- src/core/AxiInterface.sv | 340 ------ src/core/CPUCore.sv | 38 +- src/core/backend/Backend.sv | 2 +- src/core/backend/lsu/LSU.sv | 6 +- src/core/backend/lsu/dcache/DCacheMiss.sv | 51 +- src/core/backend/lsu/dcache/DCacheWay.sv | 84 +- src/core/backend/lsu/dcache/ReplaceQueue.sv | 138 +-- src/core/backend/lsu/dcache/dcache.sv | 176 ++- src/core/mem/cache/Directory.sv | 204 ++++ src/core/mem/cache/L2Cache.sv | 101 ++ src/core/mem/cache/L2CacheData.sv | 139 +++ src/core/mem/cache/L2CacheWrapper.sv | 151 +++ src/core/mem/cache/L2MSHR.sv | 1161 +++++++++++++++++++ src/core/{ => mem}/mmu/DTLB.sv | 2 +- src/core/{ => mem}/mmu/ITLB.sv | 2 +- src/core/{ => mem}/mmu/L2TLB.sv | 2 +- src/core/{ => mem}/mmu/PMPCheck.sv | 2 +- src/core/{ => mem}/mmu/PTW.sv | 2 +- src/core/{ => mem}/mmu/TLB.sv | 2 +- src/core/{ => mem}/mmu/TLBCache.sv | 2 +- src/core/{ => mem}/mmu/TLBRepeater.sv | 2 +- src/defines/arch.svh | 1 + src/defines/bundles.svh | 18 +- src/defines/bus/ace.svh | 82 +- src/defines/bus/mem.svh | 29 +- src/defines/global.svh | 19 +- src/defines/interfaces.svh | 95 ++ src/sim/SimTop.sv | 16 +- src/soc/Soc.sv | 14 +- src/utils/axi/axi_convert/ace_ccu_top.sv | 241 ---- src/utils/utils.sv | 48 +- 32 files changed, 2278 insertions(+), 894 deletions(-) delete mode 100644 src/core/AxiInterface.sv create mode 100644 src/core/mem/cache/Directory.sv create mode 100644 src/core/mem/cache/L2Cache.sv create mode 100644 src/core/mem/cache/L2CacheData.sv create mode 100644 src/core/mem/cache/L2CacheWrapper.sv create mode 100644 src/core/mem/cache/L2MSHR.sv rename src/core/{ => mem}/mmu/DTLB.sv (99%) rename src/core/{ => mem}/mmu/ITLB.sv (99%) rename src/core/{ => mem}/mmu/L2TLB.sv (98%) rename src/core/{ => mem}/mmu/PMPCheck.sv (98%) rename src/core/{ => mem}/mmu/PTW.sv (99%) rename src/core/{ => mem}/mmu/TLB.sv (99%) rename src/core/{ => mem}/mmu/TLBCache.sv (99%) rename src/core/{ => mem}/mmu/TLBRepeater.sv (95%) delete mode 100644 src/utils/axi/axi_convert/ace_ccu_top.sv diff --git a/Makefile b/Makefile index 4916622..55de446 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ emu: emu-run: emu mkdir -p $(LOG_PATH) - build/emu -i "${I}" -s 1168 -b ${S} -e ${E} -B $(WB) -E $(WE) ${TRACE_ARGS} --log-path=${LOG_PATH} + riscv64-unknown-linux-gnu-gdb --args build/emu -i "${I}" -s 1168 -b ${S} -e ${E} -B $(WB) -E $(WE) ${TRACE_ARGS} --log-path=${LOG_PATH} sbi: make -C utils/opensbi ARCH=riscv CROSS_COMPILE=riscv64-unknown-linux-gnu- PLATFORM_RISCV_XLEN=32 PLATFORM=generic FW_PAYLOAD_PATH=${CURDIR}/utils/rv-linux/arch/riscv/boot/Image FW_FDT_PATH=${CURDIR}/utils/opensbi/dts/custom.dtb FW_PAYLOAD_OFFSET=0x400000 diff --git a/src/core/AxiInterface.sv b/src/core/AxiInterface.sv deleted file mode 100644 index 770d676..0000000 --- a/src/core/AxiInterface.sv +++ /dev/null @@ -1,340 +0,0 @@ -`include "../defines/defines.svh" - -module AxiInterface( - input logic clk, - input logic rst, - CacheBus.slaver icache_io, - CacheBus.slaver tlb_io, - CacheBus.slave dcache_io, - CacheBus.slave ducache_io, - AxiIO.master axi, - NativeSnoopIO.slave dcache_snoop_io -); - - typedef logic [`PADDR_SIZE-1: 0] addr_t; - typedef logic [`CORE_WIDTH-1: 0] id_t; - typedef logic user_t; - typedef logic [`CORE_WIDTH+2-1: 0] mst_id_t; - typedef logic [`XLEN-1: 0] data_t; - typedef logic [`XLEN/8-1: 0] strb_t; - `CACHE_TYPEDEF_AW_CHAN_T(AxiAW, addr_t, id_t, user_t) - `CACHE_TYPEDEF_W_CHAN_T(AxiW, data_t, strb_t, user_t) - `CACHE_TYPEDEF_B_CHAN_T(AxiB, id_t, user_t) - `CACHE_TYPEDEF_AR_CHAN_T(AxiAR, addr_t, id_t, user_t) - `CACHE_TYPEDEF_R_CHAN_T(AxiR, data_t, id_t, user_t) - `CACHE_TYPEDEF_REQ_T(AxiReq, AxiAW, AxiW, AxiAR) - `CACHE_TYPEDEF_RESP_T(AxiResp, AxiB, AxiR) - - `CACHE_TYPEDEF_AW_CHAN_T(AxiMAW, addr_t, mst_id_t, user_t) - `CACHE_TYPEDEF_B_CHAN_T(AxiMB, mst_id_t, user_t) - `CACHE_TYPEDEF_AR_CHAN_T(AxiMAR, addr_t, mst_id_t, user_t) - `CACHE_TYPEDEF_R_CHAN_T(AxiMR, data_t, mst_id_t, user_t) - `CACHE_TYPEDEF_REQ_T(AxiMReq, AxiMAW, AxiW, AxiMAR) - `CACHE_TYPEDEF_RESP_T(AxiMResp, AxiMB, AxiMR) - AxiReq ireq, dreq, du_req, tlb_req; - AxiResp iresp, dresp, du_resp, tlb_resp; - AxiMReq req_o, co_req_o; - AxiMResp resp_i, co_resp_i; - - `CACHE_ASSIGN_TO_AR(ireq.ar, icache_io) - assign ireq.ar_valid = icache_io.ar_valid; - assign ireq.r_ready = icache_io.r_ready; - assign ireq.aw = 0; - assign ireq.w = 0; - assign ireq.aw_valid = 0; - assign ireq.w_valid = 0; - assign ireq.b_ready = 0; - `CACHE_ASSIGN_FROM_R(icache_io, iresp.r) - assign icache_io.ar_ready = iresp.ar_ready; - assign icache_io.r_valid = iresp.r_valid; - - `CACHE_ASSIGN_TO_AR(tlb_req.ar, tlb_io) - assign tlb_req.ar_valid = tlb_io.ar_valid; - assign tlb_req.r_ready = icache_io.r_ready; - assign tlb_req.aw = 0; - assign tlb_req.w = 0; - assign tlb_req.aw_valid = 0; - assign tlb_req.w_valid = 0; - assign tlb_req.b_ready = 0; - `CACHE_ASSIGN_FROM_R(tlb_io, tlb_resp.r) - assign tlb_io.ar_ready = tlb_resp.ar_ready; - assign tlb_io.r_valid = tlb_resp.r_valid; - - `CACHE_ASSIGN_TO_REQ(dreq, dcache_io) - `CACHE_ASSIGN_FROM_RESP(dcache_io, dresp) - - `CACHE_ASSIGN_TO_REQ(du_req, ducache_io) - `CACHE_ASSIGN_FROM_RESP(ducache_io, du_resp) - - `__CACHE_TO_W(assign, axi.w, _, co_req_o.w, .) - assign axi.ar_valid = co_req_o.ar_valid; - assign axi.w_valid = co_req_o.w_valid; - assign axi.b_ready = co_req_o.b_ready; - assign axi.r_ready = co_req_o.r_ready; - assign axi.aw_valid = co_req_o.aw_valid; - assign axi.ar_id = co_req_o.ar.id; - assign axi.ar_addr = co_req_o.ar.addr; - assign axi.ar_len = co_req_o.ar.len; - assign axi.ar_size = co_req_o.ar.size; - assign axi.ar_burst = co_req_o.ar.burst; - assign axi.ar_user = co_req_o.ar.user; - assign axi.aw_id = co_req_o.aw.id; - assign axi.aw_addr = co_req_o.aw.addr; - assign axi.aw_len = co_req_o.aw.len; - assign axi.aw_size = co_req_o.aw.size; - assign axi.aw_burst = co_req_o.aw.burst; - assign axi.aw_user = co_req_o.aw.user; - assign axi.aw_cache = 0; - assign axi.aw_prot = 0; - assign axi.aw_qos = 0; - assign axi.aw_region = 0; - assign axi.ar_cache = 0; - assign axi.ar_prot = 0; - assign axi.ar_qos = 0; - assign axi.ar_region = 0; - `__CACHE_TO_B(assign, co_resp_i.b, ., axi.b, _) - `__CACHE_TO_R(assign, co_resp_i.r, ., axi.r, _) - assign co_resp_i.aw_ready = axi.aw_ready; - assign co_resp_i.ar_ready = axi.ar_ready; - assign co_resp_i.w_ready = axi.w_ready; - assign co_resp_i.b_valid = axi.b_valid; - assign co_resp_i.r_valid = axi.r_valid; - - axi_mux #( - .SlvAxiIDWidth(`CORE_WIDTH), - .slv_aw_chan_t(AxiAW), - .mst_aw_chan_t(AxiMAW), - .w_chan_t(AxiW), - .slv_b_chan_t(AxiB), - .mst_b_chan_t(AxiMB), - .slv_ar_chan_t(AxiAR), - .mst_ar_chan_t(AxiMAR), - .slv_r_chan_t(AxiR), - .mst_r_chan_t(AxiMR), - .slv_req_t(AxiReq), - .slv_resp_t(AxiResp), - .mst_req_t(AxiMReq), - .mst_resp_t(AxiMResp), - .MaxWTrans(1), // 只有DCache有写 - .NoSlvPorts(4) - ) axi_mux_inst( - .clk_i(clk), - .rst_ni(~rst), - .test_i(1'b0), - .slv_reqs_i({dreq, du_req, tlb_req, ireq}), - .slv_resps_o({dresp, du_resp, tlb_resp, iresp}), - .mst_req_o(req_o), - .mst_resp_i(resp_i) - ); - - logic `N(`DCACHE_WAY_WIDTH) wway; - always_ff @(posedge clk)begin - wway <= dcache_io.ar_user; - end - - DCacheCoherence #( - .aw_chan_t(AxiMAW), - .w_chan_t(AxiW), - .b_chan_t(AxiMB), - .ar_chan_t(AxiMAR), - .r_chan_t(AxiMR), - .axi_req_t(AxiMReq), - .axi_resp_t(AxiMResp) - ) dcache_coherence ( - .clk, - .rst(rst), - .wway_i(wway), - .slv_req_i(req_o), - .slv_resp_o(resp_i), - .mst_req_o(co_req_o), - .mst_resp_i(co_resp_i), - .dcache_snoop_io(dcache_snoop_io) - ); - -endmodule - -module DCacheCoherence #( - // AXI channel structs - parameter type aw_chan_t = logic, - parameter type w_chan_t = logic, - parameter type b_chan_t = logic, - parameter type ar_chan_t = logic, - parameter type r_chan_t = logic, - // AXI request & response structs - parameter type axi_req_t = logic, - parameter type axi_resp_t = logic -)( - input logic clk, - input logic rst, - input logic `N(`DCACHE_WAY_WIDTH) wway_i, - input axi_req_t slv_req_i, - output axi_resp_t slv_resp_o, - - output axi_req_t mst_req_o, - input axi_resp_t mst_resp_i, - NativeSnoopIO.slave dcache_snoop_io -); - logic `N(`DCACHE_SET_WIDTH) widx, widx_n; - logic `N(`DCACHE_TAG) wtag; - logic `ARRAY(`DCACHE_WAY, `DCACHE_TAG+1) w_tagv, tagv; - logic w_valid; - logic `N(`DCACHE_WAY_WIDTH) w_way_idx; - logic `N(`DCACHE_WAY) w_way; - logic `N(`DCACHE_WAY) tagv_hits; - logic `N(`PADDR_SIZE) ac_addr; - logic tagv_hit; - logic `N(`DCACHE_TAG) rtag; - logic r_dcache; - r_chan_t snoop_r; - - logic `N(`DCACHE_REPLACE_SIZE) replace_valid; - logic `N(`DCACHE_BLOCK_SIZE) replace_tag `N(`DCACHE_REPLACE_SIZE); - logic replace_en; - logic `N(`DCACHE_WAY_WIDTH) replace_way; - logic `N(`DCACHE_REPLACE_WIDTH) replace_free_idx; - logic `N(`PADDR_SIZE) replace_clear_addr; - logic `N(`DCACHE_REPLACE_SIZE) replace_hits, replace_clear_hits; - logic `N(`DCACHE_REPLACE_WIDTH) replace_idx, replace_clear_idx; - logic replace_hit, replace_clear_en; - logic aw_writeEvict, ar_makeUnique; - - axi_req_t slv_req_i_s2; - axi_resp_t slv_resp_o_s2; - - assign w_valid = (slv_resp_o.r.last & slv_resp_o.r_valid & slv_req_i.r_ready & - (slv_resp_o.r.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11)) | - ar_makeUnique; - Decoder #(`DCACHE_WAY) decoder_way (w_way_idx, w_way); - MPRAM #( - .WIDTH(`DCACHE_WAY * (`DCACHE_TAG+1)), - .DEPTH(`DCACHE_SET), - .READ_PORT(1), - .WRITE_PORT(0), - .RW_PORT(1), - .READ_LATENCY(1), - .RESET(1), - .BYTE_WRITE(1), - .BYTES(`DCACHE_WAY) - ) tagv_ram ( - .clk, - .rst, - .rst_sync(0), - .en({w_valid, slv_req_i.ar_valid}), - .we(w_way & {`DCACHE_WAY{w_valid}}), - .raddr(slv_req_i.ar.addr`DCACHE_SET_BUS), - .rdata({w_tagv, tagv}), - .waddr(widx), - .wdata({`DCACHE_WAY{wtag, 1'b1}}), - .ready() - ); - - assign tagv_hit = |tagv_hits; -generate - for(genvar i=0; i<`DCACHE_REPLACE_SIZE; i++)begin - assign replace_hits[i] = replace_valid[i] & (slv_req_i.ar.addr`DCACHE_BLOCK_BUS == replace_tag[i]); - assign replace_clear_hits[i] = replace_valid[i] & (replace_clear_addr`DCACHE_BLOCK_BUS == replace_tag[i]); - end - for(genvar i=0; i<`DCACHE_WAY; i++)begin - assign tagv_hits[i] = tagv[i][0] & (rtag == tagv[i][`DCACHE_TAG: 1]); - end -endgenerate - PEncoder #(`DCACHE_REPLACE_SIZE) encoder_replace_free_idx (~replace_valid, replace_free_idx); - Encoder #(`DCACHE_REPLACE_SIZE) encoder_replace_idx (replace_clear_hits, replace_idx); - always_ff @(posedge clk)begin - replace_en <= w_valid; - replace_way <= w_way_idx; - replace_hit <= |replace_hits; - rtag <= slv_req_i.ar.addr`DCACHE_TAG_BUS; - ac_addr <= slv_req_i.ar.addr; - r_dcache <= slv_req_i.ar.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11; - aw_writeEvict <= slv_req_i.aw_valid & (slv_req_i.aw.snoop == `ACEOP_WRITE_EVICT) & - (slv_req_i.aw.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11); - ar_makeUnique <= slv_req_i.ar_valid & (slv_req_i.ar.snoop == `ACEOP_MAKE_UNIQUE) & - (slv_req_i.ar.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11); - if(slv_req_i.ar_valid & (slv_req_i.ar.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11))begin - wtag <= slv_req_i.ar.addr`DCACHE_TAG_BUS; - widx <= slv_req_i.ar.addr`DCACHE_SET_BUS; - w_way_idx <= wway_i; - end - widx_n <= widx; - end - always_ff @(posedge clk, posedge rst)begin - if(rst == `RST)begin - replace_valid <= 0; - replace_tag <= '{default: 0}; - replace_clear_en <= 1'b0; - replace_clear_idx <= 0; - replace_clear_addr <= 0; - end - else begin - if(replace_en)begin - replace_valid[replace_free_idx] <= 1'b1; - replace_tag[replace_free_idx] <= {w_tagv[replace_way][`DCACHE_TAG: 1], widx_n}; - end - if(slv_req_i.aw_valid & slv_resp_o.aw_ready & slv_req_i.aw.user)begin - replace_clear_en <= 1'b1; - end - if(slv_req_i.aw_valid)begin - replace_clear_addr <= slv_req_i.aw.addr; - end - if(replace_clear_en & ~slv_req_i.ar_valid)begin - replace_clear_en <= 1'b0; - replace_clear_idx <= replace_idx; - end - if(slv_resp_o.b_valid & slv_req_i.b_ready & - (slv_resp_o.b.id[`CORE_WIDTH+1: `CORE_WIDTH] == 2'b11))begin - replace_valid[replace_clear_idx] <= 1'b0; - end - if(aw_writeEvict)begin - replace_valid[replace_idx] <= 1'b0; - end - end - end - - axi_cut #( - .aw_chan_t(aw_chan_t), - .w_chan_t(w_chan_t), - .b_chan_t(b_chan_t), - .ar_chan_t(ar_chan_t), - .r_chan_t(r_chan_t), - .axi_req_t(axi_req_t), - .axi_resp_t(axi_resp_t) - ) axi_cut_inst ( - .clk_i(clk), - .rst_ni(~rst), - .slv_req_i(slv_req_i), - .slv_resp_o(slv_resp_o), - .mst_req_o(slv_req_i_s2), - .mst_resp_i(slv_resp_o_s2) - ); - - assign mst_req_o.aw = slv_req_i_s2.aw; - assign mst_req_o.aw_valid = ~aw_writeEvict & slv_req_i_s2.aw_valid; - assign mst_req_o.w = slv_req_i_s2.w; - assign mst_req_o.w_valid = slv_req_i_s2.w_valid; - assign mst_req_o.b_ready = slv_req_i_s2.b_ready; - assign mst_req_o.ar = slv_req_i_s2.ar; - assign mst_req_o.ar_valid = (~replace_hit & ~tagv_hit | r_dcache) & ~ar_makeUnique & slv_req_i_s2.ar_valid; - assign mst_req_o.r_ready = slv_req_i_s2.r_ready; - - assign slv_resp_o_s2.aw_ready = mst_resp_i.aw_ready | aw_writeEvict; - assign slv_resp_o_s2.ar_ready = ar_makeUnique ? 1'b1 : - (~replace_hit & ~tagv_hit | r_dcache) ? mst_resp_i.ar_ready : - dcache_snoop_io.ac_ready; - assign slv_resp_o_s2.w_ready = mst_resp_i.w_ready; - assign slv_resp_o_s2.b_valid = mst_resp_i.b_valid; - assign slv_resp_o_s2.b = mst_resp_i.b; - assign slv_resp_o_s2.r_valid = dcache_snoop_io.cd_valid | mst_resp_i.r_valid; - assign slv_resp_o_s2.r = mst_resp_i.r_valid ? mst_resp_i.r : snoop_r; - - assign dcache_snoop_io.ac_addr = ac_addr; - assign dcache_snoop_io.ac_valid = (replace_hit | tagv_hit) & ~r_dcache & slv_req_i_s2.ar_valid; - assign dcache_snoop_io.ac_user = slv_req_i_s2.ar.id; - assign dcache_snoop_io.cd_ready = ~mst_resp_i.r_valid; - assign snoop_r.id = dcache_snoop_io.cd_user; - assign snoop_r.data = dcache_snoop_io.cd_data; - assign snoop_r.resp = 0; - assign snoop_r.last = dcache_snoop_io.cd_last; - assign snoop_r.user = 0; - -endmodule \ No newline at end of file diff --git a/src/core/CPUCore.sv b/src/core/CPUCore.sv index a22eacb..b485ca4 100644 --- a/src/core/CPUCore.sv +++ b/src/core/CPUCore.sv @@ -9,20 +9,42 @@ module CPUCore ( ); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, 1 + `PADDR_SIZE, `XLEN, 1, 1 ) icache_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, `DCACHE_WAY_WIDTH + `PADDR_SIZE, `XLEN, 1, `DCACHE_WAY_WIDTH ) dcache_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, 1 + `PADDR_SIZE, `XLEN, 1, 1 ) ducache_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, 1 + `PADDR_SIZE, `XLEN, 1, 1 ) tlb_io(); - NativeSnoopIO #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH+2 + CacheBus #( + `PADDR_SIZE, `XLEN, 2, 1 + ) master_io(); + SnoopIO #( + `PADDR_SIZE, `XLEN, `L2MSHR_WIDTH ) dcache_snoop_io(); + typedef logic [`PADDR_SIZE-1: 0] addr_t; + typedef logic [`XLEN-1: 0] data_t; + typedef logic [$clog2(`L2MSHR_SIZE)-1: 0] snoop_ack_id_t; + `SNOOP_TYPEDEF_AC_CHAN_T(snoop_ac_chan_t, addr_t) + `SNOOP_TYPEDEF_CD_CHAN_T(snoop_cd_chan_t, data_t) + `SNOOP_TYPEDEF_CR_CHAN_T(snoop_cr_chan_t) + `SNOOP_TYPEDEF_REQ_T(snoop_req_t, snoop_ac_chan_t, snoop_ack_id_t) + `SNOOP_TYPEDEF_RESP_T(snoop_resp_t, snoop_cd_chan_t, snoop_cr_chan_t, snoop_ack_id_t) + `SNOOP_TYPEDEF_REQ_T(mst_snoop_req_t, snoop_ac_chan_t, snoop_ack_id_t) + `SNOOP_TYPEDEF_RESP_T(mst_snoop_resp_t, snoop_cd_chan_t, snoop_cr_chan_t, snoop_ack_id_t) + snoop_req_t snoop_req; + snoop_resp_t snoop_resp; + mst_snoop_req_t mst_snoop_req; + mst_snoop_resp_t mst_snoop_resp; + `SNOOP_ASSIGN_FROM_REQ(dcache_snoop_io, snoop_req) + `SNOOP_ASSIGN_TO_RESP(snoop_resp, dcache_snoop_io) + `CACHE_ASSIGN_TO_AXI(axi, master_io) + assign mst_snoop_req = 0; + IfuBackendIO ifu_backend_io(); FsqBackendIO fsq_back_io(); CommitBus commitBus(); @@ -43,7 +65,9 @@ module CPUCore ( .commitBus_out(commitBus), .axi_io(dcache_io), .fenceBus_o(fenceBus)); - AxiInterface axi_interface(.*); + L2CacheWrapper #( + snoop_req_t, snoop_resp_t, mst_snoop_req_t, mst_snoop_resp_t + ) l2cache_wrapper (.*, .master_io(master_io.master)); L2TLB l2_tlb(.*, .csr_io(csr_l2_io), .flush(fsq_back_io.redirect.en), diff --git a/src/core/backend/Backend.sv b/src/core/backend/Backend.sv index ce1ba68..e4bf2fb 100644 --- a/src/core/backend/Backend.sv +++ b/src/core/backend/Backend.sv @@ -11,7 +11,7 @@ module Backend( CsrTlbIO.csr csr_itlb_io, CsrL2IO.csr csr_l2_io, TlbL2IO.tlb dtlb_io, - NativeSnoopIO.master dcache_snoop_io, + SnoopIO.master dcache_snoop_io, ClintIO.cpu clint_io, FenceBus.backend fenceBus_o ); diff --git a/src/core/backend/lsu/LSU.sv b/src/core/backend/lsu/LSU.sv index 378da8e..9041258 100644 --- a/src/core/backend/lsu/LSU.sv +++ b/src/core/backend/lsu/LSU.sv @@ -49,7 +49,7 @@ module LSU( input BackendCtrl backendCtrl, CacheBus.master axi_io, CacheBus.master ducache_io, - NativeSnoopIO.master snoop_io, + SnoopIO.master snoop_io, BackendRedirectIO.mem redirect_io, CsrTlbIO.tlb csr_ltlb_io, CsrTlbIO.tlb csr_stlb_io, @@ -74,12 +74,12 @@ module LSU( LoadQueueIO load_queue_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, 1 + `PADDR_SIZE, `XLEN, 1, 1 ) laxi_io(); StoreUnitIO store_io(); StoreQueueIO store_queue_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, 1 + `PADDR_SIZE, `XLEN, 1, 1 ) saxi_io(); StoreCommitIO store_commit_io(); ViolationIO violation_io(); diff --git a/src/core/backend/lsu/dcache/DCacheMiss.sv b/src/core/backend/lsu/dcache/DCacheMiss.sv index 99607ff..cb69044 100644 --- a/src/core/backend/lsu/dcache/DCacheMiss.sv +++ b/src/core/backend/lsu/dcache/DCacheMiss.sv @@ -9,6 +9,7 @@ interface DCacheMissIO; logic `N(`LOAD_PIPELINE) rfull; logic wen; + logic wdata_valid; logic `N(`PADDR_SIZE) waddr; logic `N(`PADDR_SIZE) waddr_pre; logic `N(`STORE_COMMIT_WIDTH) scIdx; @@ -33,19 +34,19 @@ interface DCacheMissIO; logic `N(`PADDR_SIZE) refillAddr; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) refillData; logic `N(`STORE_COMMIT_WIDTH) refill_scIdx; + DirectoryState refill_state; logic `N(`LOAD_REFILL_SIZE) lq_en; logic `ARRAY(`LOAD_REFILL_SIZE, `DCACHE_BITS) lqData; logic `ARRAY(`LOAD_REFILL_SIZE, `LOAD_QUEUE_WIDTH) lqIdx_o; modport miss (input ren, raddr, lqIdx, robIdx, req_success, replaceWay, refill_valid, - wen, waddr, scIdx, wdata, wmask, raddr_pre, waddr_pre, + wen, waddr, scIdx, wdata, wmask, raddr_pre, waddr_pre, wdata_valid, `ifdef RVA input amo_en, output amo_refill, `endif - output rfull, req, req_addr, wfull, - refill_en, refill_dirty, refillWay, refillAddr, refillData, refill_scIdx, - lq_en, lqData, lqIdx_o); + output rfull, req, req_addr, wfull, lq_en, lqData, lqIdx_o, + refill_en, refill_dirty, refillWay, refillAddr, refillData, refill_scIdx, refill_state); endinterface module DCacheMiss( @@ -89,6 +90,16 @@ module DCacheMiss( logic refill_eq; logic w_invalid; + logic req_cache; + logic `N(`PADDR_SIZE) cache_addr; + logic `N($clog2(`DCACHE_LINE / `DATA_BYTE)) cacheIdx; + logic `ARRAY(`DCACHE_LINE / `DATA_BYTE, `XLEN) cacheData, req_data; + logic `ARRAY(`DCACHE_LINE / `DATA_BYTE, `DATA_BYTE) req_mask; + logic `N(`XLEN) expandMask; + logic `N(`XLEN) combine_cache_data; + logic `N(`DCACHE_WAY_WIDTH) req_way; + DirectoryState req_state; + //load enqueue // 有三种情况 // 1. 没有命中, 需要mshr_en和en有空闲 @@ -206,7 +217,7 @@ endgenerate // refill assign io.refill_en = en[head] & dataValid[head]; - assign io.refill_dirty = |mask[head]; + assign io.refill_state = req_state; assign io.refillWay = way[head]; assign io.refillAddr = {addr[head], {`DCACHE_BANK_WIDTH{1'b0}}, 2'b0}; assign io.refillData = data[head]; @@ -336,7 +347,7 @@ endgenerate end if(io.wen & ~w_invalid & (write_remain_valid | whit_combine))begin scIdxs[widx] <= io.scIdx; - data_valid_all[widx] <= &wmask_all; + data_valid_all[widx] <= &wmask_all | io.wdata_valid; wvalid[widx] <= 1'b1; end if(rlast)begin @@ -363,9 +374,11 @@ endgenerate `ifdef RVA logic `N(`DCACHE_MSHR_SIZE) amo; + logic amo_req; always_ff @(posedge clk, posedge rst)begin if(rst == `RST)begin amo <= 0; + amo_req <= 0; end else begin if(io.amo_en & ~w_invalid & (write_remain_valid | whit_combine))begin @@ -374,6 +387,9 @@ endgenerate if(io.refill_en & io.refill_valid)begin amo[head] <= 1'b0; end + if(req_next & io.req_success)begin + amo_req <= amo[head]; + end end end @@ -384,15 +400,6 @@ endgenerate `endif // req - logic req_cache; - logic `N(`PADDR_SIZE) cache_addr; - logic `N($clog2(`DCACHE_LINE / `DATA_BYTE)) cacheIdx; - logic `ARRAY(`DCACHE_LINE / `DATA_BYTE, `XLEN) cacheData, req_data; - logic `ARRAY(`DCACHE_LINE / `DATA_BYTE, `DATA_BYTE) req_mask; - logic `N(`XLEN) expandMask; - logic `N(`XLEN) combine_cache_data; - logic `N(`DCACHE_WAY_WIDTH) req_way; - assign io.req = en[head] & ~req_start; assign io.req_addr = {addr[head], {`DCACHE_BANK_WIDTH{1'b0}}, 2'b0}; assign rlast = r_axi_io.r_valid & r_axi_io.r_last; @@ -418,6 +425,7 @@ endgenerate req_way <= 0; req_valid_all <= 0; req_wvalid <= 0; + req_state <= 0; end else begin if(io.req)begin @@ -458,6 +466,11 @@ endgenerate if(rlast)begin req_data <= data[head]; req_mask <= mask[head]; + req_state <= req_wvalid | req_valid_all +`ifdef RVA + | amo_req +`endif + ? 3'b111 : r_axi_io.r_resp[4: 2]; end end @@ -467,8 +480,12 @@ endgenerate assign r_axi_io.ar_len = `DCACHE_LINE / `DATA_BYTE - 1; assign r_axi_io.ar_size = $clog2(`DATA_BYTE); assign r_axi_io.ar_burst = 2'b01; - assign r_axi_io.ar_user = req_way; - assign r_axi_io.ar_snoop = req_valid_all ? `ACEOP_MAKE_UNIQUE : + assign r_axi_io.ar_user = 0; + assign r_axi_io.ar_snoop = +`ifdef RVA + amo_req ? `ACEOP_READ_UNIQUE : +`endif + req_valid_all ? `ACEOP_MAKE_UNIQUE : req_wvalid ? `ACEOP_READ_UNIQUE : `ACEOP_READ_SHARED; assign r_axi_io.r_ready = 1'b1; diff --git a/src/core/backend/lsu/dcache/DCacheWay.sv b/src/core/backend/lsu/dcache/DCacheWay.sv index 481581e..03979e1 100644 --- a/src/core/backend/lsu/dcache/DCacheWay.sv +++ b/src/core/backend/lsu/dcache/DCacheWay.sv @@ -4,56 +4,90 @@ module DCacheData( input logic clk, input logic rst, input logic `N(`LOAD_PIPELINE+1) tagv_en, - input logic `N(`DCACHE_WAY) tagv_we, + input logic `N(`DCACHE_WAY) tag_we, + input logic `N(`DCACHE_WAY) valid_we, input logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagv_index, + input logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagv_windex, input logic `N(`DCACHE_TAG+1) tagv_wdata, output logic `TENSOR(`LOAD_PIPELINE+1, `DCACHE_WAY, `DCACHE_TAG+1) tagv, + output DCacheMeta `N(`DCACHE_WAY) meta, + input DCacheMeta wmeta, input logic `N(`DCACHE_BANK) en, input logic `ARRAY(`DCACHE_BANK, `DCACHE_WAY * `DCACHE_BYTE) we, input logic `ARRAY(`DCACHE_BANK, `DCACHE_SET_WIDTH) index, input logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) wdata, - output logic `TENSOR(`DCACHE_BANK, `DCACHE_WAY, `DCACHE_BITS) data, - input logic dirty_en, - input logic `N(`DCACHE_SET_WIDTH) dirty_index, - output logic `N(`DCACHE_WAY) dirty, - input logic `N(`DCACHE_WAY) dirty_we, - input logic `N(`DCACHE_SET_WIDTH) dirty_windex, - input logic `N(`DCACHE_WAY) dirty_wdata + output logic `TENSOR(`DCACHE_BANK, `DCACHE_WAY, `DCACHE_BITS) data ); + logic `TENSOR(`LOAD_PIPELINE+1, `DCACHE_WAY, `DCACHE_TAG) tag; + logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_WAY) valid; generate for(genvar i=0; i<`LOAD_PIPELINE+1; i++)begin + for(genvar j=0; j<`DCACHE_WAY; j++)begin + assign tagv[i][j][`DCACHE_TAG: 1] = tag[i][j]; + assign tagv[i][j][0] = valid[i][j]; + end SPRAM #( - .WIDTH(`DCACHE_WAY * (`DCACHE_TAG+1)), + .WIDTH(`DCACHE_WAY * `DCACHE_TAG), .DEPTH(`DCACHE_SET), .RESET(1), .BYTE_WRITE(1), .READ_LATENCY(1), .BYTES(`DCACHE_WAY) - ) tagv_ram ( + ) tag_ram ( .clk(clk), .rst(rst), .rst_sync(0), .en(tagv_en[i]), .addr(tagv_index[i]), - .rdata(tagv[i]), - .we(tagv_we), - .wdata({`DCACHE_WAY{tagv_wdata}}), + .rdata(tag[i]), + .we(tag_we), + .wdata({`DCACHE_WAY{tagv_wdata[`DCACHE_TAG: 1]}}), .ready() ); end -endgenerate - - logic `N(`DCACHE_WAY) dirty_ram `N(`DCACHE_SET); - always_ff @(posedge clk)begin - if(dirty_en)begin - dirty <= dirty_ram[dirty_index]; - end - for(int i=0; i<`DCACHE_WAY; i++)begin - if(dirty_we[i])begin - dirty_ram[dirty_windex][i] <= dirty_wdata[i]; - end - end + for(genvar i=0; i<`LOAD_PIPELINE; i++)begin + MPRAM #( + .WIDTH(`DCACHE_WAY), + .DEPTH(`DCACHE_SET), + .READ_PORT(1), + .WRITE_PORT(1), + .RESET(1), + .BYTE_WRITE(1), + .BYTES(`DCACHE_WAY) + ) valid_ram ( + .clk, + .rst, + .rst_sync(0), + .en(tagv_en[i]), + .raddr(tagv_index[i]), + .rdata(valid[i]), + .we(valid_we[i]), + .waddr(tagv_windex[i]), + .wdata({`DCACHE_WAY{tagv_wdata[0]}}), + .ready() + ); end + MPRAM #( + .WIDTH(`DCACHE_WAY * $bits(DCacheMeta)), + .DEPTH(`DCACHE_SET), + .READ_PORT(1), + .WRITE_PORT(1), + .RESET(1), + .BYTE_WRITE(1), + .BYTES(`DCACHE_WAY) + ) meta_ram ( + .clk, + .rst, + .rst_sync(0), + .en(tagv_en[`LOAD_PIPELINE]), + .raddr(tagv_index[`LOAD_PIPELINE]), + .rdata(meta), + .we(valid_we[`LOAD_PIPELINE]), + .waddr(tagv_windex[`LOAD_PIPELINE]), + .wdata({`DCACHE_WAY{wmeta}}), + .ready() + ); +endgenerate generate for(genvar i=0; i<`DCACHE_BANK; i++)begin diff --git a/src/core/backend/lsu/dcache/ReplaceQueue.sv b/src/core/backend/lsu/dcache/ReplaceQueue.sv index 14b0cd2..e17f3ce 100644 --- a/src/core/backend/lsu/dcache/ReplaceQueue.sv +++ b/src/core/backend/lsu/dcache/ReplaceQueue.sv @@ -3,7 +3,7 @@ interface ReplaceQueueIO; logic en; logic refill_en; - logic refill_dirty; + DirectoryState refill_state; logic `N(`DCACHE_TAG+`DCACHE_SET_WIDTH) addr; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) data; logic `N(`DCACHE_REPLACE_WIDTH) idx; @@ -15,12 +15,13 @@ interface ReplaceQueueIO; logic `N(`DCACHE_REPLACE_WIDTH) replace_idx; logic snoop_en; - logic snoop_ready; + logic snoop_clean; logic snoop_hit; + DirectoryState snoop_state; logic `N(`PADDR_SIZE) snoop_addr; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_data; - modport queue (input en, refill_en, refill_dirty, addr, data, replace_idx, waddr, snoop_data, snoop_ready, output idx, whit, full, snoop_en, snoop_addr); + modport queue (input en, refill_en, refill_state, addr, data, replace_idx, waddr, snoop_en, snoop_clean, snoop_addr, output idx, whit, full, snoop_data, snoop_hit, snoop_state); modport miss (input full, idx, output replace_idx); endinterface @@ -28,8 +29,7 @@ module ReplaceQueue( input logic clk, input logic rst, ReplaceQueueIO.queue io, - CacheBus.masterw w_axi_io, - NativeSnoopIO.master snoop_io + CacheBus.masterw w_axi_io ); localparam TRANSFER_BANK = `DCACHE_LINE / `DATA_BYTE; typedef struct packed { @@ -40,7 +40,8 @@ module ReplaceQueue( ReplaceState replace_state; ReplaceEntry entrys `N(`DCACHE_REPLACE_SIZE); - logic `N(`DCACHE_REPLACE_SIZE) en, dataValid, dirty, prior; + logic `N(`DCACHE_REPLACE_SIZE) en, dataValid, prior; + DirectoryState `N(`DCACHE_REPLACE_SIZE) state; logic `N(`DCACHE_REPLACE_SIZE) valid, prior_valid; logic `N(`DCACHE_REPLACE_WIDTH) freeIdx, validIdx, priorIdx, processIdx, processIdx_pre; logic full; @@ -64,7 +65,7 @@ module ReplaceQueue( dataValid <= 0; prior <= 0; en <= 0; - dirty <= 0; + state <= 0; end else begin if(io.en & ~(|hit) & ~(&en))begin @@ -74,9 +75,13 @@ module ReplaceQueue( prior[freeIdx] <= 1'b1; end + if(io.snoop_en & io.snoop_clean & (|snoop_hit))begin + en[snoop_hit_idx] <= 1'b0; + end + if(io.refill_en)begin dataValid[io.replace_idx] <= 1'b1; - dirty[io.replace_idx] <= io.refill_dirty; + state[io.replace_idx] <= io.refill_state; end if(retire_last)begin @@ -130,7 +135,7 @@ endgenerate IDLE: begin if(|valid)begin aw_valid <= 1'b1; - aw_dirty <= dirty[processIdx_pre]; + aw_dirty <= state[processIdx_pre].dirty; replace_state <= ADDRESS; processEntry <= entrys[processIdx_pre]; processIdx <= processIdx_pre; @@ -184,11 +189,11 @@ endgenerate assign w_axi_io.aw_valid = aw_valid; assign w_axi_io.aw_id = 0; assign w_axi_io.aw_addr = {processEntry.addr, {`DCACHE_LINE_WIDTH{1'b0}}}; - assign w_axi_io.aw_len = ~aw_dirty ? 0 : `DCACHE_LINE / `DATA_BYTE - 1; + assign w_axi_io.aw_len = `DCACHE_LINE / `DATA_BYTE - 1; assign w_axi_io.aw_size = $clog2(`DATA_BYTE); assign w_axi_io.aw_burst = 2'b01; - assign w_axi_io.aw_user = aw_dirty; - assign w_axi_io.aw_snoop = aw_dirty ? `ACEOP_WRITE_BACK : `ACEOP_WRITE_EVICT; + assign w_axi_io.aw_user = 0; + assign w_axi_io.aw_snoop = aw_dirty ? `ACEOP_WRITE_CLEAN : `ACEOP_WRITE_EVICT; assign w_axi_io.w_data = processEntry.data[widx]; assign w_axi_io.w_strb = {`DATA_BYTE{1'b1}}; @@ -199,103 +204,30 @@ endgenerate assign w_axi_io.b_ready = 1'b1; // snoop - typedef struct packed { - logic `N(`DCACHE_SNOOP_ID_WIDTH) id; - logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) data; - } SnoopEntry; - logic `N(`DCACHE_SNOOP_SIZE) snoop_en, snoop_data_valid, snoop_valid, snoop_issue; - logic `N(`PADDR_SIZE) snoop_addr `N(`DCACHE_SNOOP_SIZE); - SnoopEntry `N(`DCACHE_SNOOP_SIZE) snoopEntrys; - logic snoop_en_s2, snoop_en_s3; - logic snoop_replace_hit; - logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_replace_data; - logic `N(`DCACHE_SNOOP_WIDTH) snoop_valid_idx, snoop_process_idx, snoop_free_idx, snoop_busy_idx; - logic `N(`DCACHE_SNOOP_WIDTH) snoop_busy_idx_s2, snoop_busy_idx_s3; - logic snoop_process; - logic cd_valid, cd_last; - logic `N(`DCACHE_SNOOP_ID_WIDTH) cd_user; + logic `N(`DCACHE_REPLACE_SIZE) snoop_hit; + logic `N(`DCACHE_REPLACE_WIDTH) snoop_hit_idx; + logic `TENSOR(`DCACHE_REPLACE_SIZE, `DCACHE_BANK, `DCACHE_BITS) replace_data; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_data; - logic `N($clog2(TRANSFER_BANK)) snoopIdx; - - assign snoop_valid = snoop_en & snoop_data_valid; - PEncoder #(`DCACHE_SNOOP_SIZE) encoder_snoop_idx (snoop_valid, snoop_valid_idx); - PEncoder #(`DCACHE_SNOOP_SIZE) encoder_snoop_free_idx (~snoop_en, snoop_free_idx); - PEncoder #(`DCACHE_SNOOP_SIZE) encoder_snoop_busy_idx (snoop_en & ~snoop_issue, snoop_busy_idx); - assign io.snoop_en = |(snoop_en & ~snoop_issue); - assign io.snoop_addr = snoop_addr[snoop_busy_idx]; + DirectoryState snoop_state; - always_ff @(posedge clk)begin - snoop_en_s2 <= io.snoop_en & io.snoop_ready; - snoop_en_s3 <= snoop_en_s2; - snoop_busy_idx_s2 <= snoop_busy_idx; - snoop_busy_idx_s3 <= snoop_busy_idx_s2; - snoop_replace_hit <= io.whit; - snoop_replace_data <= entrys[io.idx].data; - end - always_ff @(posedge clk, posedge rst)begin - if(rst == `RST)begin - snoop_en <= 0; - snoop_data_valid <= 0; - snoop_issue <= 0; - end - else begin - if(snoop_io.ac_valid & snoop_io.ac_ready)begin - snoop_en[snoop_free_idx] <= 1'b1; - snoopEntrys[snoop_free_idx].id <= snoop_io.ac_user; - snoop_issue[snoop_free_idx] <= 1'b0; - snoop_addr[snoop_free_idx] <= snoop_io.ac_addr; - end - if(snoop_en_s3)begin - snoop_data_valid[snoop_busy_idx_s3] <= 1'b1; - snoopEntrys[snoop_busy_idx_s3].data <= snoop_replace_hit ? snoop_replace_data : io.snoop_data; - end - if(io.snoop_en & io.snoop_ready)begin - snoop_issue[snoop_busy_idx] <= 1'b1; - end - if(cd_last)begin - snoop_en[snoop_process_idx] <= 1'b0; - snoop_data_valid[snoop_process_idx] <= 1'b0; - end - end +generate + for(genvar i=0; i<`DCACHE_REPLACE_SIZE; i++)begin + assign snoop_hit[i] = en[i] & dataValid[i] & (entrys[i].addr == io.snoop_addr`DCACHE_BLOCK_BUS); + assign replace_data[i] = entrys[i].data; end - always_ff @(posedge clk, posedge rst)begin - if(rst == `RST)begin - cd_valid <= 1'b0; - cd_last <= 1'b0; - snoopIdx <= 0; - cd_user <= 0; - snoop_process <= 1'b0; - snoop_data <= 0; - snoop_process_idx <= 0; - end - else begin - if(!snoop_process && (|snoop_valid))begin - snoop_data <= snoopEntrys[snoop_valid_idx].data; - snoop_process_idx <= snoop_valid_idx; - cd_valid <= 1'b1; - cd_user <= snoopEntrys[snoop_valid_idx].id; - end - if(snoop_io.cd_valid & snoop_io.cd_ready)begin - snoopIdx <= snoopIdx + 1; - if(snoopIdx == TRANSFER_BANK - 2)begin - cd_last <= 1'b1; - end - else begin - cd_last <= 1'b0; - end - if(cd_last)begin - snoop_process <= 1'b0; - cd_valid <= 1'b0; - end - end +endgenerate + Encoder #(`DCACHE_REPLACE_SIZE) encoder_snoop (snoop_hit, snoop_hit_idx); + OldestSelect #(`DCACHE_REPLACE_SIZE, 1, `DCACHE_BANK * `DCACHE_BITS) select_snoop_data (snoop_hit, replace_data, , snoop_data); + OldestSelect #(`DCACHE_REPLACE_SIZE, 1, $bits(DirectoryState)) select_snoop_state (snoop_hit, state, , snoop_state); + always_ff @(posedge clk)begin + if (io.snoop_en)begin + io.snoop_hit <= |snoop_hit; + io.snoop_data <= snoop_data; + io.snoop_state <= snoop_state; end end - assign snoop_io.ac_ready = ~(&snoop_en); - assign snoop_io.cd_valid = cd_valid; - assign snoop_io.cd_last = cd_last; - assign snoop_io.cd_data = snoop_data[snoopIdx]; - assign snoop_io.cd_user = cd_user; + `ifdef DIFFTEST `LOG_ARRAY(T_DCACHE, dbg_data, newEntry.data, TRANSFER_BANK) diff --git a/src/core/backend/lsu/dcache/dcache.sv b/src/core/backend/lsu/dcache/dcache.sv index 6a0a35f..104d471 100644 --- a/src/core/backend/lsu/dcache/dcache.sv +++ b/src/core/backend/lsu/dcache/dcache.sv @@ -9,12 +9,13 @@ module DCache( DCacheAmoIO.dcache amo_io, `endif CacheBus.master axi_io, - NativeSnoopIO.master snoop_io, + SnoopIO.master snoop_io, input BackendCtrl backendCtrl ); logic `ARRAY(`LOAD_PIPELINE, `DCACHE_SET_WIDTH) loadIdx; logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagvIdx; + logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagvWIdx; logic `ARRAY(`LOAD_PIPELINE, `DCACHE_LINE_WIDTH-2) loadOffset; logic `ARRAY(`LOAD_PIPELINE, `DCACHE_BANK) loadBankDecode; logic `N(`DCACHE_BANK) loadBank; @@ -37,6 +38,11 @@ module DCache( logic whit; logic `N(`DCACHE_WAY_WIDTH) w_wayIdx, missWay_encode; logic `N(`LOAD_PIPELINE) write_valid; + logic write_invalid; + logic snoop_invalid; // CLEAN_INVALID, READ_UNIQUE + logic snoop_share; // READ_SHARED + logic `N(`DCACHE_WAY) w_state_errors; + DCacheMeta select_meta; logic refill_en_n; logic `N(`DCACHE_WAY) refill_way; @@ -44,8 +50,14 @@ module DCache( logic `N(`PADDR_SIZE) refill_addr_n; logic `N(`DCACHE_BLOCK_SIZE) replace_addr; - logic snoop_req; - logic `N(`PADDR_SIZE) snoop_addr; + logic ac_ready, cr_valid, cd_valid, cd_last; + DirectoryState cr_state; + logic snoop_req, snoop_req_n; + logic `N(`PADDR_SIZE) snoop_addr, snoop_addr_n; + logic snoop_hit, snoop_cache_hit, snoop_replace_hit; + logic snoop_share_n, snoop_invalid_n; + logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_replace_data, snoop_data; + logic `N(`DCACHE_BANK_WIDTH) snoop_data_idx; logic `N(`DCACHE_WAY) way_dirty; `ifdef RVA @@ -69,7 +81,7 @@ module DCache( DCacheMissIO miss_io(); ReplaceQueueIO replace_queue_io(); CacheBus #( - `PADDR_SIZE, `XLEN, `CORE_WIDTH, `DCACHE_WAY_WIDTH + `PADDR_SIZE, `XLEN, 1, 1 ) dcache_axi_io(); ReplaceIO #( .DEPTH(`DCACHE_SET), @@ -102,8 +114,10 @@ endgenerate ParallelOR #(.WIDTH(`DCACHE_BANK), .DEPTH(`LOAD_PIPELINE)) or_bank(loadBankDecode, loadBank); logic `N(`LOAD_PIPELINE+1) tagv_en; - logic `N(`DCACHE_WAY) tagv_we; + logic `N(`DCACHE_WAY) tag_we, valid_we; logic `TENSOR(`LOAD_PIPELINE+1, `DCACHE_WAY, `DCACHE_TAG+1) tagv; + DCacheMeta `N(`DCACHE_WAY) meta; + DCacheMeta wmeta; logic `TENSOR(`DCACHE_BANK, `DCACHE_WAY, `DCACHE_BYTE) data_we; logic `ARRAY(`DCACHE_BANK, `DCACHE_SET_WIDTH) data_index; logic `N(`DCACHE_SET_WIDTH) refillIdx; @@ -112,22 +126,30 @@ endgenerate logic `N(`DCACHE_WAY) dirty_we, dirty_wdata; generate - + // TODO: 每个way使用单独的index,refill和snoop不同way时可以同时写入 for(genvar i=0; i<`LOAD_PIPELINE; i++)begin assign tagv_en[i] = rio.req[i]; - assign tagvIdx[i] = miss_io.refill_en & miss_io.refill_valid ? miss_io.refillAddr`DCACHE_SET_BUS : - loadIdx[i]; + assign tagvIdx[i] = loadIdx[i]; + assign tagvWIdx[i] = snoop_invalid & (|w_wayhit) ? widx : miss_io.refillAddr`DCACHE_SET_BUS; end assign tagv_en[`LOAD_PIPELINE] = wreq | miss_io.refill_en; assign tagvIdx[`LOAD_PIPELINE] = wreq ? widx : miss_io.refillAddr`DCACHE_SET_BUS; - assign tagv_we = {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way; + assign tagvWIdx[`LOAD_PIPELINE] = snoop_invalid & (|w_wayhit) ? widx : miss_io.refillAddr`DCACHE_SET_BUS; + // TODO: 只有原来不是share才需要写入(snoop_share) + assign tag_we = {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way; + assign valid_we = {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way | + {`DCACHE_WAY{snoop_invalid | snoop_share}} & w_wayhit; + assign wmeta.v = miss_io.refill_valid & miss_io.refill_en & ~snoop_invalid | snoop_share; + assign wmeta.share = snoop_share | miss_io.refill_state.share; + assign wmeta.owned = snoop_share & whit ? select_meta.owned : miss_io.refill_state.owner; + assign wmeta.dirty = snoop_share & whit ? select_meta.dirty : miss_io.refill_state.dirty; for(genvar i=0; i<`DCACHE_BANK; i++)begin for(genvar j=0; j<`DCACHE_WAY; j++)begin assign data_we[i][j] = {`DCACHE_BYTE{cache_wreq & cache_wway[j]}} & wmask_n[i] | {`DCACHE_BYTE{miss_io.refill_valid & miss_io.refill_en & refill_way[j]}}; assign rdata[j][i] = data_rdata[i][j]; end - assign data_index[i] = snoop_req ? snoop_addr`DCACHE_SET_BUS : + assign data_index[i] = snoop_share & whit ? snoop_addr`DCACHE_SET_BUS : |data_we[i] ? refillIdx : `ifdef RVA amo_en[i] ? amo_io.paddr`DCACHE_SET_BUS : @@ -149,25 +171,23 @@ endgenerate .clk, .rst, .tagv_en, - .tagv_we, + .tag_we, + .valid_we, .tagv_index(tagvIdx), + .tagv_windex(tagvWIdx), .tagv_wdata({miss_io.refillAddr`DCACHE_TAG_BUS, 1'b1}), .tagv, + .meta, + .wmeta, `ifdef RVA - .en((loadBank | amo_en | {`DCACHE_BANK{miss_io.refill_en | snoop_req}})), + .en((loadBank | amo_en | {`DCACHE_BANK{miss_io.refill_en | snoop_req_n}})), `else - .en((loadBank | {`DCACHE_BANK{miss_io.refill_en | snoop_req}})), + .en((loadBank | {`DCACHE_BANK{miss_io.refill_en | snoop_req_n}})), `endif .we(data_we), .index(data_index), .wdata(refillData), - .data(data_rdata), - .dirty_en(miss_io.refill_en), - .dirty_index(miss_io.refillAddr`DCACHE_SET_BUS), - .dirty(way_dirty), - .dirty_we, - .dirty_windex(refillIdx), - .dirty_wdata + .data(data_rdata) ); generate for(genvar i=0; i<`LOAD_PIPELINE; i++)begin @@ -185,9 +205,9 @@ generate logic `N(`DCACHE_BYTE) load_wmask; assign load_wmask = wmask_n[loadOffset[i]]; - assign write_valid[i] = miss_io.refill_en & ~(cache_wreq & (|w_wayhit)) & ~wio.req | - snoop_req | - cache_wreq & (|w_wayhit) & (|load_wmask) + assign write_valid[i] = miss_io.refill_en & ~(cache_wreq & whit) & ~wio.req | + snoop_req_n & whit | + cache_wreq & whit & (|load_wmask) `ifdef RVA | amo_en[loadOffset[i]] `endif @@ -250,14 +270,13 @@ endgenerate assign rio.lqIdx_o = miss_io.lqIdx_o; // write - logic write_invalid; `ifdef RVA - assign wreq = (wio.req | replace_queue_io.snoop_en | amo_io.req) & ~amo_req; - assign waddr = replace_queue_io.snoop_en ? replace_queue_io.snoop_addr : + assign wreq = (wio.req | snoop_req | amo_io.req) & ~amo_req; + assign waddr = snoop_req ? replace_queue_io.snoop_addr : wio.req ? wio.paddr : amo_io.paddr; `else - assign wreq = wio.req | replace_queue_io.snoop_en; - assign waddr = replace_queue_io.snoop_en ? replace_queue_io.snoop_addr : wio.paddr; + assign wreq = wio.req | snoop_req; + assign waddr = snoop_req ? replace_queue_io.snoop_addr : wio.paddr; `endif assign wdata = wio.data; assign wmask = wio.mask; @@ -265,20 +284,21 @@ endgenerate assign replace_io.miss_index = miss_io.req_addr`DCACHE_SET_BUS; `ifdef RVA - assign wio.valid = ~replace_queue_io.snoop_en & ~amo_req; + assign wio.valid = ~snoop_req & ~amo_req; `else - assign wio.valid = ~replace_queue_io.snoop_en; + assign wio.valid = ~snoop_req; `endif generate for(genvar i=0; i<`DCACHE_WAY; i++)begin assign wtagv[i] = tagv[`LOAD_PIPELINE][i]; assign w_wayhit[i] = wtagv[i][0] & (wtagv[i][`DCACHE_TAG: 1] == waddr_n`DCACHE_TAG_BUS); + assign w_state_errors[i] = meta[i].share | ~meta[i].owned; end endgenerate logic `N(`STORE_COMMIT_WIDTH) scIdx_n; always_ff @(posedge clk)begin - wreq_n <= wio.req & ~replace_queue_io.snoop_en & ~amo_req; + wreq_n <= wio.req & ~snoop_req & ~amo_req; miss_req_n <= miss_io.req; waddr_n <= waddr; `ifdef RVA @@ -294,6 +314,10 @@ endgenerate end `endif scIdx_n <= wio.scIdx; + snoop_invalid <= snoop_io.ac_valid & snoop_io.ac_ready & + ((snoop_io.ac_snoop == `ACEOP_READ_UNIQUE) | + (snoop_io.ac_snoop == `ACEOP_CLEAN_INVALID)); + snoop_share <= snoop_io.ac_valid & snoop_io.ac_ready & (snoop_io.ac_snoop == `ACEOP_READ_SHARED); // write_invalid <= miss_req_n && (waddr_n`DCACHE_BLOCK_BUS == waddr`DCACHE_BLOCK_BUS); end `ifdef RVA @@ -308,9 +332,10 @@ endgenerate assign replace_queue_io.waddr = miss_io.req_addr; // write miss - assign whit = (|w_wayhit); + assign whit = (|(w_wayhit & ~w_state_errors)); Encoder #(`DCACHE_WAY) encoder_way (w_wayhit, w_wayIdx); Encoder #(`DCACHE_WAY) encoder_miss_way (replace_io.miss_way, missWay_encode); + OldestSelect #(`DCACHE_WAY, 1, $bits(DCacheMeta)) selector_meta (w_wayhit, meta, , select_meta); assign miss_io.wen = ~whit & wreq_n; assign miss_io.waddr = waddr_n; assign miss_io.waddr_pre = waddr; @@ -332,7 +357,7 @@ endgenerate Decoder #(`DCACHE_WAY) decoder_refill_way (miss_io.refillWay, refill_way); // refill - assign miss_io.refill_valid = ~(cache_wreq & (|cache_wway)) & ~wio.req & ~snoop_req & ~replace_queue_io.snoop_en + assign miss_io.refill_valid = ~(cache_wreq & whit) & ~wio.req & ~snoop_req & ~snoop_req_n `ifdef RVA & ~amo_io.req `endif @@ -350,29 +375,88 @@ endgenerate refill_addr_n <= miss_io.refillAddr; end assign replace_queue_io.refill_en = refill_en_n; - assign replace_queue_io.refill_dirty = wtagv[refill_way_n][0] & way_dirty[refill_way_n]; + assign replace_queue_io.refill_state = meta[refill_way_n].v ? meta[refill_way_n][2: 0] : 0; assign replace_queue_io.addr = replace_addr; assign replace_queue_io.data = rdata[refill_way_n]; - // BUG: rlast但是没有refill进dcache时snoop访问不到该项 - assign replace_queue_io.snoop_data = rdata[refill_way_n]; -`ifdef RVA - assign replace_queue_io.snoop_ready = ~amo_req; -`else - assign replace_queue_io.snoop_ready = 1'b1; -`endif +// snoop always_ff @(posedge clk)begin - snoop_req <= replace_queue_io.snoop_en; - snoop_addr <= replace_queue_io.snoop_addr; + snoop_addr <= snoop_io.ac_addr; + snoop_addr_n <= snoop_addr; + snoop_req_n <= snoop_io.ac_valid & snoop_io.ac_ready; + snoop_replace_data <= replace_queue_io.snoop_data; + snoop_cache_hit <= snoop_req_n & whit; + snoop_replace_hit <= snoop_req_n & replace_queue_io.snoop_hit; + snoop_hit <= snoop_cache_hit | snoop_replace_hit; + snoop_share_n <= snoop_share; end + always_ff @(posedge clk, posedge rst)begin + if (rst == `RST)begin + ac_ready <= 1'b1; + cr_valid <= 1'b0; + cd_valid <= 1'b0; + cd_last <= 1'b0; + cr_state <= 0; + snoop_data <= 0; + snoop_data_idx <= 0; + snoop_invalid_n <= 0; + end + else begin + if(snoop_io.ac_valid & ac_ready)begin + ac_ready <= 1'b0; + end + if(snoop_invalid_n & snoop_io.cr_valid & snoop_io.cr_ready | + snoop_io.cd_valid & snoop_io.cd_ready & snoop_io.cd_last)begin + ac_ready <= 1'b1; + end + + if(snoop_req_n)begin + cr_valid <= 1'b1; + cr_state <= whit ? select_meta[2: 0] : replace_queue_io.snoop_state; + snoop_invalid_n <= snoop_invalid; + end + if(cr_valid & snoop_io.cr_ready)begin + cr_valid <= 1'b0; + end + + if((snoop_cache_hit | snoop_replace_hit) & snoop_share_n)begin + cd_valid <= 1'b1; + snoop_data <= snoop_cache_hit ? rdata[w_wayIdx] : snoop_replace_data; + end + if(cd_valid & snoop_io.cd_ready & snoop_io.cd_last)begin + cd_valid <= 1'b0; + end - // amo + if(cd_valid & snoop_io.cd_ready)begin + snoop_data_idx <= snoop_data_idx + 1; + if(snoop_data_idx == `DCACHE_BANK - 2)begin + cd_last <= 1'b1; + end + else begin + cd_last <= 1'b0; + end + end + end + end + assign snoop_req = snoop_io.ac_valid & snoop_io.ac_ready; + assign replace_queue_io.snoop_en = snoop_req; + assign replace_queue_io.snoop_clean = snoop_io.ac_snoop == `ACEOP_CLEAN_INVALID; + assign replace_queue_io.snoop_addr = snoop_io.ac_addr; + assign snoop_io.ac_ready = ac_ready; + assign snoop_io.cr_valid = cr_valid; + assign snoop_io.cr_resp = {cr_state, 2'b00}; + assign snoop_io.cd_valid = cd_valid; + assign snoop_io.cd_data = snoop_data[snoop_data_idx]; + assign snoop_io.cd_last = cd_last; + + +// amo `ifdef RVA // TODO: currently only one core, not implement lr/sc Decoder #(`DCACHE_BANK) decoder_amo_bank(amo_io.paddr`DCACHE_BANK_BUS, amo_bank); assign amo_en = {`DCACHE_BANK{amo_io.req}} & amo_bank; always_ff @(posedge clk)begin - amo_req <= amo_io.req & ~wio.req & ~replace_queue_io.snoop_en; + amo_req <= amo_io.req & ~wio.req & ~snoop_req; amo_addr <= amo_io.paddr; amo_mask <= amo_io.mask; amo_data <= amo_io.data; @@ -384,7 +468,7 @@ endgenerate end assign amo_rdata = rdata[w_wayIdx][amo_addr`DCACHE_BANK_BUS]; - assign amo_io.ready = ~wio.req & ~replace_queue_io.snoop_en; + assign amo_io.ready = ~wio.req & ~snoop_req; assign amo_io.success = amo_req & whit; assign amo_io.rdata = issc ? !sc_match : amo_rdata; assign miss_io.amo_en = amo_req & ~whit; diff --git a/src/core/mem/cache/Directory.sv b/src/core/mem/cache/Directory.sv new file mode 100644 index 0000000..b005c7b --- /dev/null +++ b/src/core/mem/cache/Directory.sv @@ -0,0 +1,204 @@ +`include "../../../defines/defines.svh" + +// 目录写入时机: +// 在读目录后实际上便可以进行目录更新,因为同时mshr不会更新同一个set +// slave读: +// 如果读slave hit,则更新local为shared, +// 如果l2 hit, 如果是L2 Cache且来自dcache,则更新local并且删除l2 +// 如果来自icache则不改变目录 +// 如果slave miss且l2 miss, 则更新l2和local(根据dcache) +// slave写: +// 如果是ReadUnique,并且命中l2(此时以l2中的状态为准,忽略dcache提供的SU信息)且当前状态为S, +// 那么需要转发给L3,请求变更为CleanUnique。如果是L2并且当前状态为U,则省略转发的步骤 +// 无论是否命中l2,都需要更新local,并且删除l2 Directory(因为只有dcache会发ReadUnique) +// 如果是MakeUnique, 省略实际读数据的过程,但是仍然需要按照ReadUnique的过程转发请求和更新目录 +// slave替换: +// 如果是l2并且命中那么写入l2 Directory,并且Shared状态根据l2,Dirty状态根据dcache +// 如果miss,那么正常写入替换路即可 +// 如果是l3,正常写入Directory +// 所有替换都需要删除local,因为只有owner才会发出替换请求 + +// 写入local时机: +// 当读取时,local命中,状态变为shared,directory命中,状态根据directory的状态 +// 都没命中,根据master传过来的状态 +// 如果local没有命中,则需要进行替换,根据替换路的信息决定是否要发送snoop,并且如果是owner还需要进行替换 +// 当替换时,删除local +module LocalDirectory #( + parameter SLAVE_NUM = 1, + parameter WAY = 4, + parameter SET = 64, + parameter OFFSET = 32, + parameter LLC = 1, + parameter OFFSET_WIDTH = $clog2(OFFSET), + parameter SET_WIDTH = $clog2(SET), + parameter TAG_WIDTH = `PADDR_SIZE - OFFSET_WIDTH - SET_WIDTH, + parameter SLAVE_WIDTH = idxWidth(SLAVE_NUM) +)( + input logic clk, + input logic rst, + L2MSHRSlaveIO.slaver mshr_slave_io +); + + typedef struct packed { + logic owned; // not llc + logic `N(SLAVE_WIDTH) owner; + logic `N(SLAVE_NUM) valid; + logic share; + logic `N(TAG_WIDTH) tag; + } Entry; + + parameter ENTRY_SIZE = TAG_WIDTH + 1 + (LLC ? 0 : 1) + SLAVE_NUM + SLAVE_WIDTH; + logic `ARRAY(WAY, ENTRY_SIZE) lookup_entry; + logic `N(ENTRY_SIZE) select_entry, replace_data; + logic select_hit; + logic `N(WAY) tag_hits, wway_dec; + logic en_n; + logic `N(`PADDR_SIZE) raddr_n; + + always_ff @(posedge clk)begin + raddr_n <= mshr_slave_io.raddr; + en_n <= mshr_slave_io.request; + end + + + ReplaceIO #(.DEPTH(SET), .WAY_NUM(WAY)) replace_io(); + Replace #( + .DEPTH(SET), + .WAY_NUM(WAY) + ) replace (.*); + assign replace_io.miss_index = mshr_slave_io.raddr[OFFSET_WIDTH +: SET_WIDTH]; + assign replace_io.hit_en = select_hit & en_n | mshr_slave_io.we & ~mshr_slave_io.request; + assign replace_io.hit_way = mshr_slave_io.we & ~mshr_slave_io.request ? + (|mshr_slave_io.wdata[TAG_WIDTH+1 +: SLAVE_NUM] ? mshr_slave_io.wway : ~mshr_slave_io.wway) : tag_hits; + assign replace_io.hit_index = raddr_n[OFFSET_WIDTH +: SET_WIDTH]; + +generate + for(genvar i=0; i Date: Tue, 4 Feb 2025 11:00:46 +0800 Subject: [PATCH 3/3] fix(l2cache): fix some bugs - add snoop pipline, add w, b, r pipe - fix l2 refill buffer data select - fix l2 data write before replace - add CLEAN_UNIQUE when cache write and state is share and owned - fix slave dir write bugs --- Makefile | 2 +- src/core/CPUCore.sv | 6 +- src/core/backend/lsu/dcache/DCacheMiss.sv | 71 ++++-- src/core/backend/lsu/dcache/DCacheWay.sv | 21 +- src/core/backend/lsu/dcache/ReplaceQueue.sv | 61 +++-- src/core/backend/lsu/dcache/dcache.sv | 32 +-- src/core/frontend/icache/Icache.sv | 2 +- src/core/mem/cache/Directory.sv | 10 +- src/core/mem/cache/L2CacheWrapper.sv | 42 +++- src/core/mem/cache/L2MSHR.sv | 234 +++++++++++------- src/defines/bus/ace.svh | 9 +- src/defines/bus/mem.svh | 7 +- src/defines/debug.svh | 4 +- src/defines/interfaces.svh | 4 +- src/sim/SimTop.sv | 70 ++++-- src/soc/Soc.sv | 26 +- src/utils/axi/{axi_convert => }/LICENSE | 0 .../axi/{axi_convert => }/ace_trs_dec.sv | 0 .../axi/{axi_convert => }/addr_decode.sv | 0 .../axi/{axi_convert => }/addr_decode_dync.sv | 0 .../axi/{axi_convert => }/axi_atop_filter.sv | 0 .../{axi_convert => }/axi_burst_splitter.sv | 0 src/utils/axi/{axi_convert => }/axi_cdc.sv | 0 .../axi/{axi_convert => }/axi_cdc_dst.sv | 0 .../axi/{axi_convert => }/axi_cdc_src.sv | 0 src/utils/axi/{axi_convert => }/axi_cut.sv | 0 src/utils/axi/{axi_convert => }/axi_demux.sv | 0 .../axi/{axi_convert => }/axi_demux_simple.sv | 0 .../axi/{axi_convert => }/axi_err_slv.sv | 0 .../axi/{axi_convert => }/axi_id_prepend.sv | 0 .../axi/{axi_convert => }/axi_lite_to_apb.sv | 0 .../axi/{axi_convert => }/axi_multicut.sv | 0 src/utils/axi/{axi_convert => }/axi_mux.sv | 0 src/utils/axi/{axi_convert => }/axi_to_apb.sv | 0 .../axi/{axi_convert => }/axi_to_axi_lite.sv | 0 src/utils/axi/{axi_convert => }/axi_xbar.sv | 0 .../axi/{axi_convert => }/axi_xbar_unmuxed.sv | 0 .../axi/{axi_convert => }/binary_to_gray.sv | 0 src/utils/axi/{axi_convert => }/ccu_fsm.sv | 0 .../axi/{axi_convert => }/cdc_fifo_gray.sv | 0 src/utils/axi/{axi_convert => }/counter.sv | 0 .../axi/{axi_convert => }/delta_counter.sv | 0 .../fall_through_register.sv | 0 src/utils/axi/{axi_convert => }/fifo_v3.sv | 0 .../axi/{axi_convert => }/gray_to_binary.sv | 0 src/utils/axi/{axi_convert => }/id_queue.sv | 0 .../axi/{axi_convert => }/onehot_to_bin.sv | 0 .../axi/{axi_convert => }/rr_arb_tree.sv | 0 src/utils/axi/snoop_cut.sv | 144 +++++++++++ .../axi/{axi_convert => }/spill_register.sv | 0 .../spill_register_flushable.sv | 0 .../axi/{axi_convert => }/stream_register.sv | 0 src/utils/axi/{axi_convert => }/sync.sv | 0 src/utils/utils.sv | 23 +- 54 files changed, 546 insertions(+), 222 deletions(-) rename src/utils/axi/{axi_convert => }/LICENSE (100%) rename src/utils/axi/{axi_convert => }/ace_trs_dec.sv (100%) rename src/utils/axi/{axi_convert => }/addr_decode.sv (100%) rename src/utils/axi/{axi_convert => }/addr_decode_dync.sv (100%) rename src/utils/axi/{axi_convert => }/axi_atop_filter.sv (100%) rename src/utils/axi/{axi_convert => }/axi_burst_splitter.sv (100%) rename src/utils/axi/{axi_convert => }/axi_cdc.sv (100%) rename src/utils/axi/{axi_convert => }/axi_cdc_dst.sv (100%) rename src/utils/axi/{axi_convert => }/axi_cdc_src.sv (100%) rename src/utils/axi/{axi_convert => }/axi_cut.sv (100%) rename src/utils/axi/{axi_convert => }/axi_demux.sv (100%) rename src/utils/axi/{axi_convert => }/axi_demux_simple.sv (100%) rename src/utils/axi/{axi_convert => }/axi_err_slv.sv (100%) rename src/utils/axi/{axi_convert => }/axi_id_prepend.sv (100%) rename src/utils/axi/{axi_convert => }/axi_lite_to_apb.sv (100%) rename src/utils/axi/{axi_convert => }/axi_multicut.sv (100%) rename src/utils/axi/{axi_convert => }/axi_mux.sv (100%) rename src/utils/axi/{axi_convert => }/axi_to_apb.sv (100%) rename src/utils/axi/{axi_convert => }/axi_to_axi_lite.sv (100%) rename src/utils/axi/{axi_convert => }/axi_xbar.sv (100%) rename src/utils/axi/{axi_convert => }/axi_xbar_unmuxed.sv (100%) rename src/utils/axi/{axi_convert => }/binary_to_gray.sv (100%) rename src/utils/axi/{axi_convert => }/ccu_fsm.sv (100%) rename src/utils/axi/{axi_convert => }/cdc_fifo_gray.sv (100%) rename src/utils/axi/{axi_convert => }/counter.sv (100%) rename src/utils/axi/{axi_convert => }/delta_counter.sv (100%) rename src/utils/axi/{axi_convert => }/fall_through_register.sv (100%) rename src/utils/axi/{axi_convert => }/fifo_v3.sv (100%) rename src/utils/axi/{axi_convert => }/gray_to_binary.sv (100%) rename src/utils/axi/{axi_convert => }/id_queue.sv (100%) rename src/utils/axi/{axi_convert => }/onehot_to_bin.sv (100%) rename src/utils/axi/{axi_convert => }/rr_arb_tree.sv (100%) create mode 100644 src/utils/axi/snoop_cut.sv rename src/utils/axi/{axi_convert => }/spill_register.sv (100%) rename src/utils/axi/{axi_convert => }/spill_register_flushable.sv (100%) rename src/utils/axi/{axi_convert => }/stream_register.sv (100%) rename src/utils/axi/{axi_convert => }/sync.sv (100%) diff --git a/Makefile b/Makefile index 55de446..4916622 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ emu: emu-run: emu mkdir -p $(LOG_PATH) - riscv64-unknown-linux-gnu-gdb --args build/emu -i "${I}" -s 1168 -b ${S} -e ${E} -B $(WB) -E $(WE) ${TRACE_ARGS} --log-path=${LOG_PATH} + build/emu -i "${I}" -s 1168 -b ${S} -e ${E} -B $(WB) -E $(WE) ${TRACE_ARGS} --log-path=${LOG_PATH} sbi: make -C utils/opensbi ARCH=riscv CROSS_COMPILE=riscv64-unknown-linux-gnu- PLATFORM_RISCV_XLEN=32 PLATFORM=generic FW_PAYLOAD_PATH=${CURDIR}/utils/rv-linux/arch/riscv/boot/Image FW_FDT_PATH=${CURDIR}/utils/opensbi/dts/custom.dtb FW_PAYLOAD_OFFSET=0x400000 diff --git a/src/core/CPUCore.sv b/src/core/CPUCore.sv index b485ca4..648b881 100644 --- a/src/core/CPUCore.sv +++ b/src/core/CPUCore.sv @@ -4,7 +4,8 @@ module CPUCore ( input logic clk, input logic rst, - AxiIO.master axi, + AxiIO.master mem_axi, + AxiIO.master peri_axi, ClintIO.cpu clint_io ); @@ -42,7 +43,8 @@ module CPUCore ( mst_snoop_resp_t mst_snoop_resp; `SNOOP_ASSIGN_FROM_REQ(dcache_snoop_io, snoop_req) `SNOOP_ASSIGN_TO_RESP(snoop_resp, dcache_snoop_io) - `CACHE_ASSIGN_TO_AXI(axi, master_io) + `CACHE_ASSIGN_TO_AXI(mem_axi, master_io) + `CACHE_ASSIGN_TO_AXI(peri_axi, ducache_io) assign mst_snoop_req = 0; IfuBackendIO ifu_backend_io(); diff --git a/src/core/backend/lsu/dcache/DCacheMiss.sv b/src/core/backend/lsu/dcache/DCacheMiss.sv index cb69044..c9a7c61 100644 --- a/src/core/backend/lsu/dcache/DCacheMiss.sv +++ b/src/core/backend/lsu/dcache/DCacheMiss.sv @@ -16,6 +16,9 @@ interface DCacheMissIO; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) wdata; logic `ARRAY(`DCACHE_BANK, `DCACHE_BYTE) wmask; logic wfull; + logic wowned; + logic replaceHit; + logic `N(`DCACHE_WAY_WIDTH) replaceHitWay; `ifdef RVA logic amo_en; @@ -26,13 +29,17 @@ interface DCacheMissIO; logic `N(`PADDR_SIZE) req_addr; logic req_success; logic `N(`DCACHE_WAY_WIDTH) replaceWay; + logic `N(`L2MSHR_WIDTH) l2_idx; logic refill_en; logic refill_valid; - logic refill_dirty; + logic refill_write; + logic refill_replace_hit; logic `N(`DCACHE_WAY_WIDTH) refillWay; logic `N(`PADDR_SIZE) refillAddr; + logic `ARRAY(`DCACHE_BANK, `DCACHE_BYTE) refillMask; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) refillData; + logic `N(`L2MSHR_WIDTH) refill_l2idx; logic `N(`STORE_COMMIT_WIDTH) refill_scIdx; DirectoryState refill_state; @@ -40,13 +47,13 @@ interface DCacheMissIO; logic `ARRAY(`LOAD_REFILL_SIZE, `DCACHE_BITS) lqData; logic `ARRAY(`LOAD_REFILL_SIZE, `LOAD_QUEUE_WIDTH) lqIdx_o; - modport miss (input ren, raddr, lqIdx, robIdx, req_success, replaceWay, refill_valid, - wen, waddr, scIdx, wdata, wmask, raddr_pre, waddr_pre, wdata_valid, + modport miss (input ren, raddr, lqIdx, robIdx, req_success, replaceHit, replaceHitWay, replaceWay, refill_valid, l2_idx, + wen, waddr, scIdx, wdata, wmask, raddr_pre, waddr_pre, wdata_valid, wowned, `ifdef RVA input amo_en, output amo_refill, `endif - output rfull, req, req_addr, wfull, lq_en, lqData, lqIdx_o, - refill_en, refill_dirty, refillWay, refillAddr, refillData, refill_scIdx, refill_state); + output rfull, req, req_addr, wfull, lq_en, lqData, lqIdx_o, refill_l2idx, + refill_en, refill_write, refill_replace_hit, refillWay, refillAddr, refillMask, refillData, refill_scIdx, refill_state); endinterface module DCacheMiss( @@ -73,6 +80,9 @@ module DCacheMiss( logic `ARRAY(`DCACHE_BANK, `DCACHE_BYTE) mask `N(`DCACHE_MISS_SIZE); logic `N(`DCACHE_MISS_SIZE) data_valid_all, wvalid; logic `N(`STORE_COMMIT_WIDTH) scIdxs `N(`DCACHE_MISS_SIZE); + logic `ARRAY(`DCACHE_MISS_SIZE, `L2MSHR_WIDTH) l2_idxs; + logic `N(`DCACHE_MISS_SIZE) replaceHit, wowned; + logic `ARRAY(`DCACHE_MISS_SIZE, `DCACHE_WAY_WIDTH) replaceHitWay; logic `N(`DCACHE_MISS_WIDTH) mshr_head, head, tail; logic `N(`DCACHE_MISS_WIDTH+1) remain_count; @@ -99,6 +109,7 @@ module DCacheMiss( logic `N(`XLEN) combine_cache_data; logic `N(`DCACHE_WAY_WIDTH) req_way; DirectoryState req_state; + logic req_replace_hit, req_owned, req_owned_after; //load enqueue // 有三种情况 @@ -220,8 +231,12 @@ endgenerate assign io.refill_state = req_state; assign io.refillWay = way[head]; assign io.refillAddr = {addr[head], {`DCACHE_BANK_WIDTH{1'b0}}, 2'b0}; + assign io.refillMask = req_mask | {`DCACHE_BANK*`DCACHE_BYTE{~req_owned_after}}; assign io.refillData = data[head]; assign io.refill_scIdx = scIdxs[head]; + assign io.refill_l2idx = l2_idxs[head]; + assign io.refill_write = wvalid[head]; + assign io.refill_replace_hit = req_replace_hit; Decoder #(`DCACHE_MISS_SIZE) decoder_head (head, head_decode); assign w_refill_eq = {`DCACHE_MISS_SIZE{data_refilled}} & whit & head_decode; @@ -307,6 +322,9 @@ endgenerate data_refilled <= 0; data_valid_all <= 0; wvalid <= 0; + replaceHit <= 0; + replaceHitWay <= 0; + wowned <= 0; end else begin head <= head + (io.refill_en & io.refill_valid); @@ -344,6 +362,9 @@ endgenerate en[freeIdx[`LOAD_PIPELINE]] <= 1'b1; addr[freeIdx[`LOAD_PIPELINE]] <= io.waddr`DCACHE_BLOCK_BUS; dataValid[freeIdx[`LOAD_PIPELINE]] <= 1'b0; + replaceHit[freeIdx[`LOAD_PIPELINE]] <= io.replaceHit; + replaceHitWay[freeIdx[`LOAD_PIPELINE]] <= io.replaceHitWay; + wowned[freeIdx[`LOAD_PIPELINE]] <= io.wowned; end if(io.wen & ~w_invalid & (write_remain_valid | whit_combine))begin scIdxs[widx] <= io.scIdx; @@ -353,7 +374,7 @@ endgenerate if(rlast)begin data_refilled <= 1'b1; end - if(req_last | r_axi_io.ar_valid & r_axi_io.ar_ready & req_valid_all)begin + if(req_last)begin dataValid[head] <= 1'b1; end if(req_next & io.req_success)begin @@ -365,6 +386,7 @@ endgenerate data_refilled <= 1'b0; data_valid_all[head] <= 1'b0; wvalid[head] <= 1'b0; + replaceHit[head] <= 1'b0; end if(refilled[mshr_head] & ~(mshr_hit_valid))begin refilled[mshr_head] <= 1'b0; @@ -387,7 +409,7 @@ endgenerate if(io.refill_en & io.refill_valid)begin amo[head] <= 1'b0; end - if(req_next & io.req_success)begin + if(req_next & io.req_success | replaceHit[head] & ~req_start)begin amo_req <= amo[head]; end end @@ -400,7 +422,7 @@ endgenerate `endif // req - assign io.req = en[head] & ~req_start; + assign io.req = en[head] & ~replaceHit[head] & ~req_start; assign io.req_addr = {addr[head], {`DCACHE_BANK_WIDTH{1'b0}}, 2'b0}; assign rlast = r_axi_io.r_valid & r_axi_io.r_last; @@ -426,10 +448,16 @@ endgenerate req_valid_all <= 0; req_wvalid <= 0; req_state <= 0; + l2_idxs <= 0; + req_replace_hit <= 1'b0; + req_owned <= 0; + req_owned_after <= 0; end else begin - if(io.req)begin + if(io.req | replaceHit[head] & ~req_start)begin req_start <= 1'b1; + req_replace_hit <= replaceHit[head]; + req_owned <= wowned[head]; end if(req_next & ~io.req_success)begin @@ -440,29 +468,37 @@ endgenerate req_start <= 1'b0; end - if(req_next & io.req_success)begin + if(req_next & io.req_success | replaceHit[head] & ~req_start)begin req_cache <= 1'b1; cache_addr <= io.req_addr; req_way <= io.replaceWay; req_valid_all <= data_valid_all[head]; req_wvalid <= wvalid[head]; - end - - if(req_next & io.req_success)begin - way[head] <= io.replaceWay; + way[head] <= replaceHit[head] ? replaceHitWay[head] : io.replaceWay; end if(r_axi_io.ar_valid & r_axi_io.ar_ready)begin req_cache <= 1'b0; end - if(r_axi_io.r_valid)begin + // MAKE_UNIQUE OR CLEAN_UNIQUE + if(r_axi_io.r_valid & ~(r_axi_io.r_last & (cacheIdx == 0)))begin cacheIdx <= cacheIdx + 1; end + if(r_axi_io.r_valid & (cacheIdx == 0))begin + l2_idxs[head] <= io.l2_idx; + end end - if(r_axi_io.r_valid)begin + if(r_axi_io.r_valid & ~(r_axi_io.r_last & (cacheIdx == 0)))begin cacheData[cacheIdx] <= r_axi_io.r_data; end + if(r_axi_io.r_valid & r_axi_io.r_last & (cacheIdx == 0) & req_owned & req_replace_hit)begin + req_owned_after <= 1'b1; + end + else if(r_axi_io.r_valid & r_axi_io.r_last)begin + req_owned_after <= 1'b0; + end + if(rlast)begin req_data <= data[head]; req_mask <= mask[head]; @@ -470,7 +506,7 @@ endgenerate `ifdef RVA | amo_req `endif - ? 3'b111 : r_axi_io.r_resp[4: 2]; + ? 3'b101 : r_axi_io.r_resp[4: 2]; end end @@ -486,6 +522,7 @@ endgenerate amo_req ? `ACEOP_READ_UNIQUE : `endif req_valid_all ? `ACEOP_MAKE_UNIQUE : + req_wvalid & req_replace_hit & req_owned ? `ACEOP_CLEAN_UNIQUE : req_wvalid ? `ACEOP_READ_UNIQUE : `ACEOP_READ_SHARED; assign r_axi_io.r_ready = 1'b1; diff --git a/src/core/backend/lsu/dcache/DCacheWay.sv b/src/core/backend/lsu/dcache/DCacheWay.sv index 03979e1..8b54e55 100644 --- a/src/core/backend/lsu/dcache/DCacheWay.sv +++ b/src/core/backend/lsu/dcache/DCacheWay.sv @@ -7,8 +7,8 @@ module DCacheData( input logic `N(`DCACHE_WAY) tag_we, input logic `N(`DCACHE_WAY) valid_we, input logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagv_index, - input logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagv_windex, - input logic `N(`DCACHE_TAG+1) tagv_wdata, + input logic `N(`DCACHE_SET_WIDTH) tagv_windex, + input logic `N(`DCACHE_TAG) tag_wdata, output logic `TENSOR(`LOAD_PIPELINE+1, `DCACHE_WAY, `DCACHE_TAG+1) tagv, output DCacheMeta `N(`DCACHE_WAY) meta, input DCacheMeta wmeta, @@ -21,6 +21,9 @@ module DCacheData( logic `TENSOR(`LOAD_PIPELINE+1, `DCACHE_WAY, `DCACHE_TAG) tag; logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_WAY) valid; generate + for(genvar i=0; i<`DCACHE_WAY; i++)begin + assign valid[`LOAD_PIPELINE][i] = meta[i].v; + end for(genvar i=0; i<`LOAD_PIPELINE+1; i++)begin for(genvar j=0; j<`DCACHE_WAY; j++)begin assign tagv[i][j][`DCACHE_TAG: 1] = tag[i][j]; @@ -38,10 +41,10 @@ generate .rst(rst), .rst_sync(0), .en(tagv_en[i]), - .addr(tagv_index[i]), + .addr(|tag_we ? tagv_windex : tagv_index[i]), .rdata(tag[i]), .we(tag_we), - .wdata({`DCACHE_WAY{tagv_wdata[`DCACHE_TAG: 1]}}), + .wdata({`DCACHE_WAY{tag_wdata}}), .ready() ); end @@ -61,9 +64,9 @@ generate .en(tagv_en[i]), .raddr(tagv_index[i]), .rdata(valid[i]), - .we(valid_we[i]), - .waddr(tagv_windex[i]), - .wdata({`DCACHE_WAY{tagv_wdata[0]}}), + .we(valid_we), + .waddr(tagv_windex), + .wdata({`DCACHE_WAY{wmeta.v}}), .ready() ); end @@ -82,8 +85,8 @@ generate .en(tagv_en[`LOAD_PIPELINE]), .raddr(tagv_index[`LOAD_PIPELINE]), .rdata(meta), - .we(valid_we[`LOAD_PIPELINE]), - .waddr(tagv_windex[`LOAD_PIPELINE]), + .we(valid_we), + .waddr(tagv_windex), .wdata({`DCACHE_WAY{wmeta}}), .ready() ); diff --git a/src/core/backend/lsu/dcache/ReplaceQueue.sv b/src/core/backend/lsu/dcache/ReplaceQueue.sv index e17f3ce..3b8bbda 100644 --- a/src/core/backend/lsu/dcache/ReplaceQueue.sv +++ b/src/core/backend/lsu/dcache/ReplaceQueue.sv @@ -4,6 +4,7 @@ interface ReplaceQueueIO; logic en; logic refill_en; DirectoryState refill_state; + logic entry_en; logic `N(`DCACHE_TAG+`DCACHE_SET_WIDTH) addr; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) data; logic `N(`DCACHE_REPLACE_WIDTH) idx; @@ -21,7 +22,7 @@ interface ReplaceQueueIO; logic `N(`PADDR_SIZE) snoop_addr; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_data; - modport queue (input en, refill_en, refill_state, addr, data, replace_idx, waddr, snoop_en, snoop_clean, snoop_addr, output idx, whit, full, snoop_data, snoop_hit, snoop_state); + modport queue (input en, refill_en, refill_state, entry_en, addr, data, replace_idx, waddr, snoop_en, snoop_clean, snoop_addr, output idx, whit, full, snoop_data, snoop_hit, snoop_state); modport miss (input full, idx, output replace_idx); endinterface @@ -33,10 +34,11 @@ module ReplaceQueue( ); localparam TRANSFER_BANK = `DCACHE_LINE / `DATA_BYTE; typedef struct packed { + logic en; logic `N(`DCACHE_TAG+`DCACHE_SET_WIDTH) addr; logic `ARRAY(TRANSFER_BANK, `XLEN) data; } ReplaceEntry; - typedef enum { IDLE, ADDRESS, WRITE, WAIT_B, RETIRE } ReplaceState; + typedef enum { IDLE, ADDRESS, WRITE, WAIT_B } ReplaceState; ReplaceState replace_state; ReplaceEntry entrys `N(`DCACHE_REPLACE_SIZE); @@ -47,10 +49,18 @@ module ReplaceQueue( logic full; logic `N(`DCACHE_REPLACE_SIZE) hit; logic `N(`PADDR_SIZE) waddr; - logic retire_last; ReplaceEntry newEntry; + logic aw_valid; + logic `N($clog2(TRANSFER_BANK)) widx; + logic wvalid; + logic wlast; + logic aw_dirty; + logic refill_invalid; + ReplaceEntry processEntry; + assign io.full = full; + assign newEntry.en = io.entry_en; assign newEntry.addr = io.addr; assign newEntry.data = io.data; PEncoder #(`DCACHE_REPLACE_SIZE) encoder_free_idx (~en, freeIdx); @@ -84,7 +94,7 @@ module ReplaceQueue( state[io.replace_idx] <= io.refill_state; end - if(retire_last)begin + if(w_axi_io.b_valid | refill_invalid)begin en[processIdx] <= 1'b0; dataValid[processIdx] <= 1'b0; prior[processIdx] <= 1'b0; @@ -96,7 +106,7 @@ module ReplaceQueue( assign waddr = io.snoop_en ? io.snoop_addr : io.waddr; generate for(genvar i=0; i<`DCACHE_REPLACE_SIZE; i++)begin - assign hit[i] = dataValid[i] & (waddr`DCACHE_BLOCK_BUS == entrys[i].addr); + assign hit[i] = dataValid[i] & entrys[i].en & (waddr`DCACHE_BLOCK_BUS == entrys[i].addr); end logic `N(`DCACHE_REPLACE_WIDTH) whit_idx; Encoder #(`DCACHE_REPLACE_SIZE) encoder_hit (hit, whit_idx); @@ -107,12 +117,6 @@ generate endgenerate // axi - logic aw_valid; - logic `N($clog2(TRANSFER_BANK)) widx; - logic wvalid; - logic wlast; - logic aw_dirty; - ReplaceEntry processEntry; assign valid = en & dataValid; assign prior_valid = en & dataValid & prior; @@ -128,15 +132,21 @@ endgenerate processEntry <= 0; processIdx <= 0; replace_state <= IDLE; - retire_last <= 1'b0; + refill_invalid <= 1'b0; end else begin case(replace_state) IDLE: begin if(|valid)begin - aw_valid <= 1'b1; aw_dirty <= state[processIdx_pre].dirty; - replace_state <= ADDRESS; + if (entrys[processIdx_pre].en)begin + replace_state <= ADDRESS; + aw_valid <= 1'b1; + end + else begin + replace_state <= WAIT_B; + refill_invalid <= 1'b1; + end processEntry <= entrys[processIdx_pre]; processIdx <= processIdx_pre; end @@ -144,13 +154,8 @@ endgenerate ADDRESS: begin if(w_axi_io.aw_valid & w_axi_io.aw_ready)begin aw_valid <= 1'b0; - if(aw_dirty)begin - replace_state <= WRITE; - wvalid <= 1'b1; - end - else begin - replace_state <= RETIRE; - end + replace_state <= WRITE; + wvalid <= 1'b1; end end WRITE: begin @@ -169,17 +174,9 @@ endgenerate end end WAIT_B: begin - if(w_axi_io.b_valid)begin - replace_state <= RETIRE; - end - end - RETIRE: begin - if(retire_last)begin + if(w_axi_io.b_valid | refill_invalid)begin replace_state <= IDLE; - retire_last <= 1'b0; - end - else begin - retire_last <= 1'b1; + refill_invalid <= 1'b0; end end endcase @@ -231,7 +228,7 @@ endgenerate `ifdef DIFFTEST `LOG_ARRAY(T_DCACHE, dbg_data, newEntry.data, TRANSFER_BANK) - `Log(DLog::Debug, T_DCACHE, io.refill_en & io.refill_dirty, + `Log(DLog::Debug, T_DCACHE, io.refill_en & io.entry_en, $sformatf("dcache replace. [%h] %s", newEntry.addr << `DCACHE_LINE_WIDTH, dbg_data)) `endif endmodule \ No newline at end of file diff --git a/src/core/backend/lsu/dcache/dcache.sv b/src/core/backend/lsu/dcache/dcache.sv index 104d471..1570610 100644 --- a/src/core/backend/lsu/dcache/dcache.sv +++ b/src/core/backend/lsu/dcache/dcache.sv @@ -15,7 +15,7 @@ module DCache( logic `ARRAY(`LOAD_PIPELINE, `DCACHE_SET_WIDTH) loadIdx; logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagvIdx; - logic `ARRAY(`LOAD_PIPELINE+1, `DCACHE_SET_WIDTH) tagvWIdx; + logic `N(`DCACHE_SET_WIDTH) tagvWIdx; logic `ARRAY(`LOAD_PIPELINE, `DCACHE_LINE_WIDTH-2) loadOffset; logic `ARRAY(`LOAD_PIPELINE, `DCACHE_BANK) loadBankDecode; logic `N(`DCACHE_BANK) loadBank; @@ -59,6 +59,8 @@ module DCache( logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) snoop_replace_data, snoop_data; logic `N(`DCACHE_BANK_WIDTH) snoop_data_idx; logic `N(`DCACHE_WAY) way_dirty; + logic snoop_rack; + logic `N(`L2MSHR_WIDTH) snoop_rid; `ifdef RVA logic amo_req, islr, issc, amo_req_n; @@ -123,18 +125,16 @@ endgenerate logic `N(`DCACHE_SET_WIDTH) refillIdx; logic `ARRAY(`DCACHE_BANK, `DCACHE_BITS) refillData; logic `TENSOR(`DCACHE_BANK, `DCACHE_WAY, `DCACHE_BITS) data_rdata; - logic `N(`DCACHE_WAY) dirty_we, dirty_wdata; generate // TODO: 每个way使用单独的index,refill和snoop不同way时可以同时写入 for(genvar i=0; i<`LOAD_PIPELINE; i++)begin assign tagv_en[i] = rio.req[i]; assign tagvIdx[i] = loadIdx[i]; - assign tagvWIdx[i] = snoop_invalid & (|w_wayhit) ? widx : miss_io.refillAddr`DCACHE_SET_BUS; end assign tagv_en[`LOAD_PIPELINE] = wreq | miss_io.refill_en; assign tagvIdx[`LOAD_PIPELINE] = wreq ? widx : miss_io.refillAddr`DCACHE_SET_BUS; - assign tagvWIdx[`LOAD_PIPELINE] = snoop_invalid & (|w_wayhit) ? widx : miss_io.refillAddr`DCACHE_SET_BUS; + assign tagvWIdx = snoop_invalid & (|w_wayhit) ? widx : miss_io.refillAddr`DCACHE_SET_BUS; // TODO: 只有原来不是share才需要写入(snoop_share) assign tag_we = {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way; assign valid_we = {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way | @@ -146,7 +146,7 @@ generate for(genvar i=0; i<`DCACHE_BANK; i++)begin for(genvar j=0; j<`DCACHE_WAY; j++)begin assign data_we[i][j] = {`DCACHE_BYTE{cache_wreq & cache_wway[j]}} & wmask_n[i] | - {`DCACHE_BYTE{miss_io.refill_valid & miss_io.refill_en & refill_way[j]}}; + {`DCACHE_BYTE{miss_io.refill_valid & miss_io.refill_en & refill_way[j]}} & miss_io.refillMask[i]; assign rdata[j][i] = data_rdata[i][j]; end assign data_index[i] = snoop_share & whit ? snoop_addr`DCACHE_SET_BUS : @@ -162,10 +162,6 @@ endgenerate assign refillIdx = cache_wreq & (|cache_wway) ? waddr_n`DCACHE_SET_BUS : miss_io.refillAddr`DCACHE_SET_BUS; assign refillData = cache_wreq & (|cache_wway) ? wdata_n : miss_io.refillData; - assign dirty_we = {`DCACHE_WAY{cache_wreq}} & cache_wway | - {`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en}} & refill_way; - assign dirty_wdata = ({`DCACHE_WAY{cache_wreq}} & cache_wway) | - ({`DCACHE_WAY{miss_io.refill_valid & miss_io.refill_en & miss_io.refill_dirty}} & refill_way); DCacheData cache_data ( .clk, @@ -175,7 +171,7 @@ endgenerate .valid_we, .tagv_index(tagvIdx), .tagv_windex(tagvWIdx), - .tagv_wdata({miss_io.refillAddr`DCACHE_TAG_BUS, 1'b1}), + .tag_wdata(miss_io.refillAddr`DCACHE_TAG_BUS), .tagv, .meta, .wmeta, @@ -343,6 +339,8 @@ endgenerate assign miss_io.wmask = wmask_n; assign miss_io.req_success = ~snoop_req & miss_req_n & ~replace_queue_io.full & ~replace_queue_io.whit; + assign miss_io.replaceHit = |w_wayhit; + assign miss_io.replaceHitWay = w_wayIdx; assign miss_io.replaceWay = missWay_encode; assign miss_io.scIdx = scIdx_n; assign wio.conflict = miss_io.wfull; @@ -363,18 +361,19 @@ endgenerate `endif ; always_ff @(posedge clk)begin - wio.refill <= miss_io.refill_en & miss_io.refill_valid & miss_io.refill_dirty; + wio.refill <= miss_io.refill_en & miss_io.refill_valid & miss_io.refill_write; wio.refillIdx <= miss_io.refill_scIdx; end // replace enqueue assign replace_addr = {wtagv[refill_way_n][`DCACHE_TAG: 1], refill_addr_n`DCACHE_SET_BUS}; always_ff @(posedge clk)begin - refill_en_n <= miss_io.refill_en & miss_io.refill_valid; + refill_en_n <= miss_io.refill_en & miss_io.refill_valid & ~miss_io.refill_replace_hit; refill_way_n <= snoop_req ? w_wayIdx : miss_io.refillWay; refill_addr_n <= miss_io.refillAddr; end assign replace_queue_io.refill_en = refill_en_n; + assign replace_queue_io.entry_en = meta[refill_way_n].v; assign replace_queue_io.refill_state = meta[refill_way_n].v ? meta[refill_way_n][2: 0] : 0; assign replace_queue_io.addr = replace_addr; assign replace_queue_io.data = rdata[refill_way_n]; @@ -389,6 +388,8 @@ endgenerate snoop_replace_hit <= snoop_req_n & replace_queue_io.snoop_hit; snoop_hit <= snoop_cache_hit | snoop_replace_hit; snoop_share_n <= snoop_share; + snoop_rack <= miss_io.refill_en & miss_io.refill_valid; + snoop_rid <= miss_io.refill_l2idx; end always_ff @(posedge clk, posedge rst)begin if (rst == `RST)begin @@ -412,7 +413,7 @@ endgenerate if(snoop_req_n)begin cr_valid <= 1'b1; - cr_state <= whit ? select_meta[2: 0] : replace_queue_io.snoop_state; + cr_state <= |w_wayhit ? select_meta[2: 0] : replace_queue_io.snoop_state; snoop_invalid_n <= snoop_invalid; end if(cr_valid & snoop_io.cr_ready)begin @@ -442,12 +443,15 @@ endgenerate assign replace_queue_io.snoop_en = snoop_req; assign replace_queue_io.snoop_clean = snoop_io.ac_snoop == `ACEOP_CLEAN_INVALID; assign replace_queue_io.snoop_addr = snoop_io.ac_addr; + assign miss_io.l2_idx = snoop_io.ar_snoop_id; assign snoop_io.ac_ready = ac_ready; assign snoop_io.cr_valid = cr_valid; assign snoop_io.cr_resp = {cr_state, 2'b00}; assign snoop_io.cd_valid = cd_valid; assign snoop_io.cd_data = snoop_data[snoop_data_idx]; assign snoop_io.cd_last = cd_last; + assign snoop_io.rack = snoop_rack; + assign snoop_io.r_snoop_id = snoop_rid; // amo @@ -514,7 +518,7 @@ endgenerate `LOG_ARRAY(T_DCACHE, dbg_refillData, miss_io.refillData, `DCACHE_BANK) `LOG_ARRAY(T_DCACHE, dbg_wdata, dbg_wdatan, `DCACHE_BANK) `Log(DLog::Debug, T_DCACHE, miss_io.refill_en & miss_io.refill_valid, - $sformatf("dcache refill. [%8h %d %b] %s", miss_io.refillAddr, miss_io.refillWay, miss_io.refill_dirty, dbg_refillData)) + $sformatf("dcache refill. [%8h %d %b] %s", miss_io.refillAddr, miss_io.refillWay, miss_io.refill_write, dbg_refillData)) `Log(DLog::Debug, T_DCACHE, wreq_n & whit, $sformatf("dcache write. [%h %d] %s", waddr_n, w_wayIdx, dbg_wdata)) `endif diff --git a/src/core/frontend/icache/Icache.sv b/src/core/frontend/icache/Icache.sv index e1b28e4..42d2e41 100644 --- a/src/core/frontend/icache/Icache.sv +++ b/src/core/frontend/icache/Icache.sv @@ -273,7 +273,7 @@ endgenerate assign axi_io.ar_burst = 2'b01; assign axi_io.ar_valid = main_state == MISS; assign axi_io.ar_user = 0; - assign axi_io.ar_snoop = `ACEOP_READ_SHARED; + assign axi_io.ar_snoop = `ACEOP_READ_ONCE; assign axi_io.r_ready = 1'b1; `define REQ_DEF \ diff --git a/src/core/mem/cache/Directory.sv b/src/core/mem/cache/Directory.sv index b005c7b..60a3951 100644 --- a/src/core/mem/cache/Directory.sv +++ b/src/core/mem/cache/Directory.sv @@ -75,7 +75,7 @@ module LocalDirectory #( generate for(genvar i=0; i snoop > read assign renq_en = ~full & ~free_conflict & slave_io.ar_valid; assign wenq_en = ~full & slave_io.aw_valid & ~write_waiting; assign snoop_enq_en = ~full & mst_snoop_req.ac_valid; + assign rw_conflict = slave_io.aw_valid & ~write_waiting & (slave_io.ar_addr[OFFSET_WIDTH +: SET_WIDTH] == slave_io.aw_addr[OFFSET_WIDTH +: SET_WIDTH]); + assign rsnoop_conflict = mst_snoop_req.ac_valid & (mst_snoop_req.ac.addr[OFFSET_WIDTH +: SET_WIDTH] == slave_io.ar_addr[OFFSET_WIDTH +: SET_WIDTH]); generate for(genvar i=0; i cmp[0] ? cmp[1] : cmp[0]; @@ -292,7 +298,8 @@ generate OldestSelect #( .RADIX(RADIX/2), .WIDTH(WIDTH), - .DATA_WIDTH(DATA_WIDTH) + .DATA_WIDTH(DATA_WIDTH), + .DIRECTION(DIRECTION) ) select1 ( .cmp(cmp[RADIX/2-1: 0]), .data_i(data_i[RADIX/2-1: 0]), @@ -302,7 +309,8 @@ generate OldestSelect #( .RADIX(RADIX-RADIX/2), .WIDTH(WIDTH), - .DATA_WIDTH(DATA_WIDTH) + .DATA_WIDTH(DATA_WIDTH), + .DIRECTION(DIRECTION) ) select2 ( .cmp(cmp[RADIX-1: RADIX/2]), .data_i(data_i[RADIX-1: RADIX/2]), @@ -311,7 +319,12 @@ generate ); if(WIDTH == 1)begin assign cmp_o = cmp1 | cmp2; - assign data_o = cmp2 ? data2 : data1; + if(DIRECTION == 1)begin + assign data_o = cmp1 ? data1 : data2; + end + else begin + assign data_o = cmp2 ? data2 : data1; + end end else begin assign cmp_o = cmp2 > cmp1 ? cmp2 : cmp1;