diff --git a/.vscode/settings.json b/.vscode/settings.json index e87fc35..1df3b1e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,8 @@ "**/*" ], "todo-tree.filtering.useBuiltInExcludes": "file and search excludes", - "systemverilogFormatter.veribleBuild": "Ubuntu-20.04-focal-x86_64", + "systemverilogFormatter.veribleBuild": "none", "systemverilogFormatter.commandLineArguments": "--indentation_spaces 4", - "editor.formatOnSave": true + "editor.formatOnSave": true, + "verilog.ctags.path": "ctags" } \ No newline at end of file diff --git a/src/vsrc/core_config.sv b/src/vsrc/core_config.sv index 13a38d9..2606369 100644 --- a/src/vsrc/core_config.sv +++ b/src/vsrc/core_config.sv @@ -4,15 +4,19 @@ package core_config; + // Global parameters parameter ADDR_WIDTH = 32; parameter DATA_WIDTH = 32; - // Frontend Parameters parameter FETCH_WIDTH = 4; parameter ICACHELINE_WIDTH = 128; parameter FRONTEND_FTQ_SIZE = 8; + // ICache parameters + parameter ICACHE_NWAY = 2; + parameter ICACHE_NSET = 256; + // Commit Parameters parameter COMMIT_WIDTH = 2; diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index bc03ff0..53ad2ec 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -279,6 +279,7 @@ module cpu_top logic [1:0][`InstAddrBus] frontend_icache_addr; // ICache -> Frontend + logic [1:0]icache_frontend_rreq_ack; logic [1:0]icache_frontend_valid; logic [1:0][ICACHELINE_WIDTH-1:0] icache_frontend_data; @@ -291,28 +292,44 @@ module cpu_top logic [13:0] dispatch_csr_read_addr; logic [`RegBus] dispatch_csr_data; + logic icacop_op_en[2]; + logic icacop_ack; + logic [1:0] cacop_op_mode[2]; + icache u_icache( - .clk (clk ), - .rst (rst ), + .clk (clk ), + .rst (rst ), - // Port A - .rreq_1_i (frontend_icache_rreq[0]), - .raddr_1_i (frontend_icache_addr[0]), - .rvalid_1_o (icache_frontend_valid[0]), - .rdata_1_o (icache_frontend_data[0]), - // Port B - .rreq_2_i (frontend_icache_rreq[1]), - .raddr_2_i (frontend_icache_addr[1]), - .rvalid_2_o (icache_frontend_valid[1]), - .rdata_2_o (icache_frontend_data[1]), - - // <-> AXI Controller - .axi_addr_o (icache_axi_addr), - .axi_rreq_o (icache_axi_rreq), - .axi_rdy_i (axi_icache_rdy), - .axi_rvalid_i (axi_icache_rvalid), - .axi_rlast_i (), - .axi_data_i (axi_icache_data), + // Port A + .rreq_1_i (frontend_icache_rreq[0]), + .raddr_1_i (frontend_icache_addr[0]), + .rreq_1_ack_o (icache_frontend_rreq_ack[0]), + .rvalid_1_o (icache_frontend_valid[0]), + .rdata_1_o (icache_frontend_data[0]), + // Port B + .rreq_2_i (frontend_icache_rreq[1]), + .raddr_2_i (frontend_icache_addr[1]), + .rreq_2_ack_o (icache_frontend_rreq_ack[1]), + .rvalid_2_o (icache_frontend_valid[1]), + .rdata_2_o (icache_frontend_data[1]), + + // <-> AXI Controller + .axi_addr_o (icache_axi_addr), + .axi_rreq_o (icache_axi_rreq), + .axi_rdy_i (axi_icache_rdy), + .axi_rvalid_i (axi_icache_rvalid), + .axi_rlast_i (), + .axi_data_i (axi_icache_data), + + .frontend_uncache_i(), + .invalid_i(), + + //-> CACOP + .cacop_i(icacop_op_en[0]), + .cacop_mode_i(cacop_op_mode[0]), + .cacop_addr_i({tlb_data_o.tag,tlb_data_o.index,tlb_data_o.offset}), + .cacop_ack_o(icacop_ack), + // TLB related .tlb_i(tlb_inst), // <- TLB @@ -338,6 +355,7 @@ module cpu_top // <-> ICache .icache_read_addr_o(frontend_icache_addr), // -> ICache .icache_read_req_o(frontend_icache_rreq), + .icache_rreq_ack_i(icache_frontend_rreq_ack), .icache_read_valid_i(icache_frontend_valid), // <- ICache .icache_read_data_i(icache_frontend_data), // <- ICache @@ -549,10 +567,11 @@ module cpu_top .excp_flush(excp_flush), .ertn_flush(ertn_flush), - // -> Cache - .icacop_op_en(icacop_op_en), + // <-> Cache + .icacop_op_en(icacop_op_en[i]), + .icacop_op_ack_i(icacop_ack), .dcacop_op_en(dcacop_op_en), - .cacop_op_mode(dicacop_op_mode), + .cacop_op_mode(cacop_op_mode[i]), // <-> Ctrl .stall({mem_stallreq[0] | mem_stallreq[1] ,stall[3]}), @@ -1065,10 +1084,10 @@ ila_1 ila_cpu_top ( .probe0(u_axi_master.inst_r_state), // input wire [3:0] probe0 .probe1(u_axi_master.data_r_state), // input wire [3:0] probe1 .probe2(u_axi_master.w_state), // input wire [3:0] probe2 - .probe3({26'b0,u_axi_master.dcache_rd_type_i,u_axi_master.s_arsize}), // input wire [31:0] probe3 - .probe4(u_axi_master.dcache_rd_req_i), // input wire [0:0] probe4 - .probe5(u_axi_master.dcache_ret_valid_o), // input wire [0:0] probe5 - .probe6(u_regfile.regs[4]), // input wire [31:0] probe6 + .probe3({21'b0,u_icache.hit,u_icache.miss_1, u_icache.miss_2,u_icache.cacop_i,u_axi_master.dcache_rd_type_i,u_axi_master.s_arsize}), // input wire [31:0] probe3 + .probe4(u_axi_master.icache_rd_req_i), // input wire [0:0] probe4 + .probe5(u_axi_master.icache_ret_valid_o), // input wire [0:0] probe5 + .probe6(u_icache.state), // input wire [31:0] probe6 .probe7(u_tlb.data_i.fetch), // input wire [0:0] probe7 .probe8(u_tlb.we), // input wire [0:0] probe8 .probe9(u_tlb.data_i.vaddr), // input wire [31:0] probe9 diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index 56daeec..f60698a 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -18,6 +18,7 @@ module frontend // ICache is fixed dual port output logic [1:0] icache_read_req_o, output logic [1:0][ADDR_WIDTH-1:0] icache_read_addr_o, + input logic [1:0] icache_rreq_ack_i, input logic [1:0] icache_read_valid_i, input logic [1:0][ICACHELINE_WIDTH-1:0] icache_read_data_i, @@ -141,10 +142,11 @@ module frontend .tlb_o(tlb_o), // <-> Frontend <-> ICache - .icache_rreq_o (icache_read_req_o), - .icache_raddr_o (icache_read_addr_o), + .icache_rreq_o(icache_read_req_o), + .icache_raddr_o(icache_read_addr_o), + .icache_rreq_ack_i(icache_rreq_ack_i), .icache_rvalid_i(icache_read_valid_i), - .icache_rdata_i (icache_read_data_i), + .icache_rdata_i(icache_read_data_i), // <-> Frontend <-> Instruction Buffer diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index 8bc0ece..240eb1b 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -87,7 +87,7 @@ module ftq next_FTQ = FTQ; // clear out if committed for (integer i = 0; i < COMMIT_WIDTH; i++) begin - if (backend_commit_i[i]) next_FTQ[comm_ptr+i] = 0; + if (i < backend_commit_num) next_FTQ[comm_ptr+i] = 0; end // Accept BPU input if (bpu_i.valid) next_FTQ[bpu_ptr] = bpu_i; diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 33c02b5..7bc8cc3 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -31,6 +31,7 @@ module ifu // <-> Frontend <-> ICache output logic [1:0] icache_rreq_o, output logic [1:0][ADDR_WIDTH-1:0] icache_raddr_o, + input logic [1:0] icache_rreq_ack_i, input logic [1:0] icache_rvalid_i, input logic [1:0][ICACHELINE_WIDTH-1:0] icache_rdata_i, @@ -42,8 +43,9 @@ module ifu // P0 signal logic p0_send_rreq, p0_send_rreq_delay1; // P1 signal + logic p1_rreq_ack; logic p1_read_done; // Read done is same cycle as ICache return valid - logic p1_stallreq; // Currently in transaction and not done yet + logic p1_in_transaction; // Currently in transaction and not done yet // Flush state logic is_flushing_r, is_flushing; @@ -51,8 +53,8 @@ module ifu // P0, send read req to ICache & TLB ///////////////////////////////////////////////////////////////////////////////// // Condition when to send rreq to ICache, see doc for detail - assign p0_send_rreq = ftq_i.valid & ~is_flushing & ~stallreq_i & ~p1_stallreq; - assign ftq_accept_o = p0_send_rreq; // FTQ handshake, same cycle as ftq_i + assign p0_send_rreq = ftq_i.valid & ~is_flushing & ~stallreq_i & ~p1_in_transaction; + assign ftq_accept_o = p0_send_rreq; // FTQ handshake, same cycle as ftq_i, FTQ can move to next block always_ff @(posedge clk) begin p0_send_rreq_delay1 <= p0_send_rreq; end @@ -65,10 +67,6 @@ module ifu logic dmw0_en, dmw1_en; assign dmw0_en = ((csr_i.dmw0[`PLV0] && csr_i.plv == 2'd0) || (csr_i.dmw0[`PLV3] && csr_i.plv == 2'd3)) && (p0_pc[31:29] == csr_i.dmw0[`VSEG]); // Direct map window 0 assign dmw1_en = ((csr_i.dmw1[`PLV0] && csr_i.plv == 2'd0) || (csr_i.dmw1[`PLV3] && csr_i.plv == 2'd3)) && (p0_pc[31:29] == csr_i.dmw1[`VSEG]); // Direct map window 1 - assign tlb_o.dmw0_en = dmw0_en; - assign tlb_o.dmw1_en = dmw1_en; - assign tlb_o.trans_en = csr_i.pg && !csr_i.da && !dmw0_en && !dmw1_en; // Not in direct map windows, enable paging - assign tlb_o.vaddr = p0_pc; // Send read req to ICache & TLB always_comb begin @@ -80,10 +78,22 @@ module ifu icache_raddr_o[1] = ftq_i.is_cross_cacheline ? {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0} + 16 : 0; // TODO: remove magic number // Send req to TLB tlb_o.fetch = 1; + tlb_o.dmw0_en = dmw0_en; + tlb_o.dmw1_en = dmw1_en; + tlb_o.trans_en = csr_i.pg && !csr_i.da && !dmw0_en && !dmw1_en; // Not in direct map windows, enable paging + tlb_o.vaddr = p0_pc; + end else if (p1_in_transaction) begin + // Or P1 is in transaction + icache_rreq_o[0] = 1; + icache_rreq_o[1] = p1_read_transaction.is_cross_cacheline ? 1 : 0; + icache_raddr_o[0] = {p1_read_transaction.start_pc[ADDR_WIDTH-1:4], 4'b0}; + icache_raddr_o[1] = p1_read_transaction.is_cross_cacheline ? {p1_read_transaction.start_pc[ADDR_WIDTH-1:4], 4'b0} + 16 : 0; // TODO: remove magic number + // Hold output to TLB + tlb_o = p1_read_transaction.tlb_rreq; end else begin icache_rreq_o = 0; icache_raddr_o = 0; - tlb_o.fetch = 0; + tlb_o = 0; end end @@ -115,6 +125,7 @@ module ifu logic [`InstAddrBus] start_pc; logic is_cross_cacheline; logic [$clog2(`FETCH_WIDTH+1)-1:0] length; + logic [1:0] icache_rreq_ack_r; logic [1:0] icache_rvalid_r; logic [1:0][ICACHELINE_WIDTH-1:0] icache_rdata_r; logic [$clog2(FRONTEND_FTQ_SIZE)-1:0] ftq_id; @@ -126,10 +137,13 @@ module ifu logic [ADDR_WIDTH-1:0] p1_pc; assign p1_pc = p1_read_transaction.start_pc; + assign p1_in_transaction = p1_read_transaction.valid & ~p1_read_done; assign p1_read_done = p1_read_transaction.is_cross_cacheline ? - (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]) & (icache_rvalid_i[1]| p1_read_transaction.icache_rvalid_r[1]) : - (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]); - assign p1_stallreq = p1_read_transaction.valid & ~p1_read_done; + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]) & (icache_rvalid_i[1] | p1_read_transaction.icache_rvalid_r[1]) : + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]); + assign p1_rreq_ack = p1_read_transaction.is_cross_cacheline ? + (icache_rreq_ack_i[0] | p1_read_transaction.icache_rreq_ack_r[0]) & (icache_rreq_ack_i[1] | p1_read_transaction.icache_rreq_ack_r[1]) : + (icache_rreq_ack_i[0] | p1_read_transaction.icache_rreq_ack_r[0]); always_ff @(posedge clk) begin : p1_ff if (rst) begin p1_read_transaction <= 0; @@ -139,6 +153,7 @@ module ifu p1_read_transaction.start_pc <= ftq_i.start_pc; p1_read_transaction.is_cross_cacheline <= ftq_i.is_cross_cacheline; p1_read_transaction.length <= ftq_i.length; + p1_read_transaction.icache_rreq_ack_r <= icache_rreq_ack_i; p1_read_transaction.icache_rvalid_r <= 0; p1_read_transaction.icache_rdata_r <= 0; p1_read_transaction.ftq_id <= ftq_id_i; @@ -158,6 +173,9 @@ module ifu p1_read_transaction.icache_rvalid_r[1] <= 1; p1_read_transaction.icache_rdata_r[1] <= icache_rdata_i[1]; end + // Store ACK in P1 data structure + if (icache_rreq_ack_i[0]) p1_read_transaction.icache_rreq_ack_r <= 1; + if (icache_rreq_ack_i[1]) p1_read_transaction.icache_rreq_ack_r <= 1; end end @@ -235,10 +253,5 @@ module ifu end end - // P2 Debug - logic debug_p2_tlb_trans_en; - logic [1:0] debug_p2_csr_plv; - assign debug_p2_tlb_trans_en = p1_read_transaction.tlb_rreq.trans_en; - assign debug_p2_csr_plv = p1_read_transaction.csr.plv; endmodule diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 6a05cea..1a15f15 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -3,28 +3,34 @@ `include "utils/bram.sv" `include "utils/lfsr.sv" +// 1 as valid + module icache import core_config::*; import tlb_types::*; -#( - parameter NSET = 256, - parameter NWAY = 2 -) ( +( input logic clk, input logic rst, // Read port 1 input logic rreq_1_i, input logic [ADDR_WIDTH-1:0] raddr_1_i, + output logic rreq_1_ack_o, output logic rvalid_1_o, output logic [ICACHELINE_WIDTH-1:0] rdata_1_o, // Read port 2 input logic rreq_2_i, input logic [ADDR_WIDTH-1:0] raddr_2_i, + output logic rreq_2_ack_o, output logic rvalid_2_o, output logic [ICACHELINE_WIDTH-1:0] rdata_2_o, + // Frontend Uncache + // DATF or DMW generated uncache + input logic frontend_uncache_i, + input logic invalid_i, // For some reason, frontend want to reset whole ICache + // <-> AXI Controller output logic [ADDR_WIDTH-1:0] axi_addr_o, output logic axi_rreq_o, @@ -34,12 +40,10 @@ module icache input logic [ICACHELINE_WIDTH-1:0] axi_data_i, // CACOP - input logic uncache_en, - input logic icacop_op_en, - input logic [1:0] cacop_op_mode, - input logic [7:0] cacop_op_addr_index, - input logic [19:0] cacop_op_addr_tag, - input logic [3:0] cacop_op_addr_offset, + input logic cacop_i, + input logic [1:0] cacop_mode_i, + input logic [ADDR_WIDTH-1:0] cacop_addr_i, + output cacop_ack_o, // TLB related input tlb_inst_t tlb_i, // <- TLB @@ -49,24 +53,36 @@ module icache // Reset signal logic rst_n; assign rst_n = ~rst; - // Indicates an entry is valid - logic valid_flag; + // Parameters + localparam NWAY = ICACHE_NWAY; + localparam NSET = ICACHE_NSET; - // Refill states + // States enum int { IDLE, REFILL_1_REQ, REFILL_1_WAIT, REFILL_2_REQ, - REFILL_2_WAIT + REFILL_2_WAIT, + INVALID, // Reset all tag ram to 0 + CACOP_INVALID_1, + CACOP_INVALID_2 } state, next_state; + // Invalid controls + logic [$clog2(NSET)-1:0] invalid_cnt; + logic invalid_done; + + // Random number generator + logic [2:0] random_r; + // BRAM signals logic [NWAY-1:0][1:0][ICACHELINE_WIDTH-1:0] data_bram_rdata; logic [NWAY-1:0][1:0][ICACHELINE_WIDTH-1:0] data_bram_wdata; logic [NWAY-1:0][1:0][$clog2(NSET)-1:0] data_bram_addr; logic [NWAY-1:0][1:0] data_bram_we; + logic [NWAY-1:0][1:0] data_bram_en; // Tag bram // {1bit valid, 20bits tag} @@ -75,146 +91,258 @@ module icache logic [NWAY-1:0][1:0][TAG_BRAM_WIDTH-1:0] tag_bram_wdata; logic [NWAY-1:0][1:0][$clog2(NSET)-1:0] tag_bram_addr; logic [NWAY-1:0][1:0] tag_bram_we; + logic [NWAY-1:0][1:0] tag_bram_en; // P1 signal logic miss_1_pulse, miss_2_pulse, miss_1_r, miss_2_r, miss_1, miss_2; // miss signal logic p1_rreq_1, p1_rreq_2; // P1 rreq reg logic [ADDR_WIDTH-1:0] p1_raddr_1, p1_raddr_2; // P1 raddr reg logic p1_tlb_miss; // P1 TLB result miss - // rvalid_1 & 2 is only valid when state == IDLE or (miss_1 | (state == REFILL_1_WAIT & ~rvalid_1)) - logic rvalid_1, rvalid_2; + logic [1:0] hit; logic [NWAY-1:0][1:0] tag_hit; // P1 hit signal + logic axi_rvalid_delay_1; + + // CACOP + logic cacop_op_mode0, cacop_op_mode1, cacop_op_mode2; + logic [$clog2(NWAY)-1:0] cacop_way; + logic [$clog2(NSET)-1:0] cacop_index; + // AXI rvalid delay + always_ff @(posedge clk) begin + axi_rvalid_delay_1 <= axi_rvalid_i; + end + // Invalid control + assign invalid_done = invalid_cnt == {$clog2(NSET) {1'b1}}; + always_ff @(posedge clk) begin + if (rst) invalid_cnt <= 0; + else if (state == IDLE) invalid_cnt <= 0; + else if (state == INVALID) invalid_cnt <= invalid_cnt + 1; + end + // CACOP control + assign cacop_ack_o = state == CACOP_INVALID_2; + assign cacop_op_mode0 = cacop_i & cacop_mode_i == 2'b00; + assign cacop_op_mode1 = cacop_i & cacop_mode_i == 2'b01; + assign cacop_op_mode2 = cacop_i & cacop_mode_i == 2'b10; + assign cacop_way = cacop_addr_i[$clog2(NWAY)-1:0]; + assign cacop_index = cacop_addr_i[$clog2( + ICACHELINE_WIDTH + )+$clog2( + NSET + )-1:$clog2( + ICACHELINE_WIDTH + )]; - // valid flag - always_ff @(posedge clk or negedge rst_n) begin - // Flip valid bit if reset of receive a ICache invalid instruction - if (!rst_n) valid_flag <= ~valid_flag; + // State machine + always_ff @(posedge clk) begin + if (rst) begin + state <= INVALID; // Reset into invalid + end else begin + state <= next_state; + end end + always_comb begin : transition_comb + case (state) + IDLE: begin + if (invalid_i) next_state = INVALID; + else if (cacop_i) next_state = CACOP_INVALID_1; + else if (miss_1) next_state = REFILL_1_REQ; + else if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end + REFILL_1_REQ: begin + if (axi_rdy_i) next_state = REFILL_1_WAIT; + else next_state = REFILL_1_REQ; + end + REFILL_2_REQ: begin + if (axi_rdy_i) next_state = REFILL_2_WAIT; + else next_state = REFILL_2_REQ; + end + REFILL_1_WAIT: begin + if (hit[0]) begin + if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end else next_state = REFILL_1_WAIT; + end + REFILL_2_WAIT: begin + if (hit[1]) next_state = IDLE; + else next_state = REFILL_2_WAIT; + end + INVALID: begin + if (invalid_done) next_state = IDLE; + else next_state = INVALID; + end + CACOP_INVALID_1: next_state = CACOP_INVALID_2; + CACOP_INVALID_2: next_state = IDLE; + default: begin + next_state = IDLE; + end + endcase + end + + ///////////////////////////////////////////////// // PO, query BRAM //////////////////////////////////////////////// - logic [ 3:0] real_offset; - logic [19:0] real_tag; - logic [ 7:0] real_index; - assign real_offset = icacop_op_en ? cacop_op_addr_offset : p1_tlb.offset; - assign real_tag = icacop_op_en ? cacop_op_addr_tag : p1_tlb.tag; - assign real_index = icacop_op_en ? cacop_op_addr_index : p1_tlb.index; - - // BRAM - generate - for (genvar i = 0; i < NWAY; i++) begin : tag_bram + // RREQ ack + // hit or next_state is REQ + // if next_state is not REQ, means something important is going on, do not accept rreq + assign rreq_1_ack_o = state == IDLE & rreq_1_i & (~miss_1 | next_state == REFILL_1_REQ); + assign rreq_2_ack_o = state == IDLE & rreq_2_i & (~miss_2 | next_state == REFILL_2_REQ); -`ifdef BRAM_IP - bram_icache_tag_ram u_bram ( - .clka (clk), - .clkb (clk), - .wea (tag_bram_we[i][0]), - .web (tag_bram_we[i][1]), - .dina (tag_bram_wdata[i][0]), - .addra(tag_bram_addr[i][0]), - .douta(tag_bram_rdata[i][0]), - .dinb (tag_bram_wdata[i][1]), - .addrb(tag_bram_addr[i][1]), - .doutb(tag_bram_rdata[i][1]) - ); -`else - - bram #( - .DATA_WIDTH (TAG_BRAM_WIDTH), - .DATA_DEPTH_EXP2(10) - ) u_bram ( - .clk (clk), - .wea (tag_bram_we[i][0]), - .web (tag_bram_we[i][1]), - .dina (tag_bram_wdata[i][0]), - .addra(tag_bram_addr[i][0]), - .douta(tag_bram_rdata[i][0]), - .dinb (tag_bram_wdata[i][1]), - .addrb(tag_bram_addr[i][1]), - .doutb(tag_bram_rdata[i][1]) - ); -`endif - end - endgenerate - generate - for (genvar i = 0; i < NWAY; i++) begin : data_bram -`ifdef BRAM_IP - bram_icache_data_ram u_bram ( - .clka (clk), - .clkb (clk), - .wea (data_bram_we[i][0]), - .web (data_bram_we[i][1]), - .dina (data_bram_wdata[i][0]), - .addra(data_bram_addr[i][0]), - .douta(data_bram_rdata[i][0]), - .dinb (data_bram_wdata[i][1]), - .addrb(data_bram_addr[i][1]), - .doutb(data_bram_rdata[i][1]) - ); -`else - bram #( - .DATA_WIDTH (128), - .DATA_DEPTH_EXP2(10) - ) u_bram ( - .clk (clk), - .wea (data_bram_we[i][0]), - .web (data_bram_we[i][1]), - .dina (data_bram_wdata[i][0]), - .addra(data_bram_addr[i][0]), - .douta(data_bram_rdata[i][0]), - .dinb (data_bram_wdata[i][1]), - .addrb(data_bram_addr[i][1]), - .doutb(data_bram_rdata[i][1]) - ); -`endif - end - endgenerate - - // BRAM index gen + // BRAM input signals + // Addr & EN always_comb begin : bram_addr_gen + // Default all 0 for (integer i = 0; i < NWAY; i++) begin - if (miss_1 | (state == REFILL_1_WAIT & ~rvalid_1)) begin - tag_bram_addr[i][0] = p1_raddr_1[11:4]; - data_bram_addr[i][0] = p1_raddr_1[11:4]; - end else if (rreq_1_i) begin - tag_bram_addr[i][0] = raddr_1_i[11:4]; - data_bram_addr[i][0] = raddr_1_i[11:4]; - end else begin - tag_bram_addr[i][0] = 0; - data_bram_addr[i][0] = 0; - end + tag_bram_addr[i][0] = 0; + data_bram_addr[i][0] = 0; + tag_bram_addr[i][1] = 0; + data_bram_addr[i][1] = 0; + tag_bram_en[i] = 0; + data_bram_en[i] = 0; end - for (integer i = 0; i < NWAY; i++) begin - if (miss_2 | (state == REFILL_2_WAIT & ~rvalid_2)) begin - tag_bram_addr[i][1] = p1_raddr_2[11:4]; - data_bram_addr[i][1] = p1_raddr_2[11:4]; - end else if (rreq_2_i) begin - tag_bram_addr[i][1] = raddr_2_i[11:4]; - data_bram_addr[i][1] = raddr_2_i[11:4]; - end else begin - tag_bram_addr[i][1] = 0; - data_bram_addr[i][1] = 0; + case (state) + IDLE: begin + for (integer i = 0; i < NWAY; i++) begin + // Port 1 + if (rreq_1_i) begin + tag_bram_en[i][0] = 1; + data_bram_en[i][0] = 1; + tag_bram_addr[i][0] = raddr_1_i[11:4]; + data_bram_addr[i][0] = raddr_1_i[11:4]; + end else if (miss_1) begin + tag_bram_addr[i][0] = p1_raddr_1[11:4]; + data_bram_addr[i][0] = p1_raddr_1[11:4]; + end + // Port 2 + if (rreq_2_i) begin + tag_bram_en[i][1] = 1; + data_bram_en[i][1] = 1; + tag_bram_addr[i][1] = raddr_2_i[11:4]; + data_bram_addr[i][1] = raddr_2_i[11:4]; + end else begin + tag_bram_addr[i][1] = p1_raddr_2[11:4]; + data_bram_addr[i][1] = p1_raddr_2[11:4]; + end + end + end + REFILL_1_REQ, REFILL_2_REQ, REFILL_1_WAIT, REFILL_2_WAIT: begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_en[i][0] = axi_rvalid_delay_1 | axi_rvalid_i; + tag_bram_en[i][1] = axi_rvalid_delay_1 | axi_rvalid_i; + data_bram_en[i][0] = axi_rvalid_delay_1 | axi_rvalid_i; + data_bram_en[i][1] = axi_rvalid_delay_1 | axi_rvalid_i; + // Port 1 + if (miss_1) begin + tag_bram_addr[i][0] = p1_raddr_1[11:4]; + data_bram_addr[i][0] = p1_raddr_1[11:4]; + end else begin + tag_bram_addr[i][0] = raddr_1_i[11:4]; + data_bram_addr[i][0] = raddr_1_i[11:4]; + end + // Port 2 + if (miss_2) begin + tag_bram_addr[i][1] = p1_raddr_2[11:4]; + data_bram_addr[i][1] = p1_raddr_2[11:4]; + end else begin + tag_bram_addr[i][1] = raddr_2_i[11:4]; + data_bram_addr[i][1] = raddr_2_i[11:4]; + end + end end + CACOP_INVALID_1: begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_en[i][0] = 0; + tag_bram_en[i][1] = 0; + data_bram_en[i][0] = 0; + data_bram_en[i][1] = 0; + if (cacop_way == i) begin + tag_bram_addr[i][1] = cacop_index; + tag_bram_en[i][1] = 1; + end + end + end + INVALID: begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_en[i][1] = 0; + data_bram_en[i][0] = 0; + data_bram_en[i][1] = 0; + tag_bram_addr[i][0] = invalid_cnt; + tag_bram_en[i][0] = 1; + end + end + default: begin + end + endcase + end + + // Data + always_comb begin : bram_data_gen + for (integer i = 0; i < NWAY; i++) begin + tag_bram_we[i] = 0; + tag_bram_wdata[i] = 0; + data_bram_we[i] = 0; + data_bram_wdata[i] = 0; end + case (state) + REFILL_1_WAIT: begin + for (integer i = 0; i < NWAY; i++) begin + if (i[0] == random_r[0]) begin + if (axi_rvalid_i) begin + tag_bram_we[i][0] = 1; + tag_bram_wdata[i][0] = {1'b1, p1_tlb.tag}; + data_bram_we[i][0] = 1; + data_bram_wdata[i][0] = axi_data_i; + end + end + end + end + REFILL_2_WAIT: begin + for (integer i = 0; i < NWAY; i++) begin + if (i[0] == random_r[0]) begin + if (axi_rvalid_i) begin + tag_bram_we[i][1] = 1; + tag_bram_wdata[i][1] = {1'b1, p1_tlb.tag}; + data_bram_we[i][1] = 1; + data_bram_wdata[i][1] = axi_data_i; + end + end + end + end + CACOP_INVALID_1: begin + for (integer i = 0; i < NWAY; i++) begin + if (cacop_way == i) begin + tag_bram_we[i][1] = 1; + tag_bram_wdata[i][1] = 0; + end + end + end + INVALID: begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_we[i][0] = 1; + tag_bram_wdata[i][0] = 0; + end + end + endcase + end //////////////////////////////////////////////////// - // P1, output gen + // P1 /////////////////////////////////////////////////// - // Generate miss signal - assign miss_1_pulse = p1_rreq_1 & ~rvalid_1 & (state == IDLE); - assign miss_2_pulse = p1_rreq_2 & ~rvalid_2 & (state == IDLE); + // Miss signal + assign miss_1_pulse = p1_rreq_1 & ~hit[0] & (state == IDLE) & ~p1_tlb_miss; + assign miss_2_pulse = p1_rreq_2 & ~hit[1] & (state == IDLE) & ~p1_tlb_miss; assign miss_1 = miss_1_pulse | miss_1_r; assign miss_2 = miss_2_pulse | miss_2_r; always_ff @(posedge clk) begin @@ -230,16 +358,17 @@ module icache end end REFILL_1_WAIT: begin - if (axi_rvalid_i) miss_1_r <= 0; + if (axi_rvalid_delay_1) miss_1_r <= 0; end REFILL_2_WAIT: begin - if (axi_rvalid_i) miss_2_r <= 0; + if (axi_rvalid_delay_1) miss_2_r <= 0; end default: begin end endcase end end + // TLB miss inst_tlb_t p1_tlb_rreq; always_ff @(posedge clk) begin @@ -258,98 +387,63 @@ module icache end end always_ff @(posedge clk) begin - if (rvalid_1_o | ~p1_rreq_1) begin + if (rreq_1_ack_o) begin p1_rreq_1 <= rreq_1_i; p1_raddr_1 <= raddr_1_i; + end else if (rvalid_1_o) begin + p1_rreq_1 <= 0; + p1_raddr_1 <= 0; end - if (rvalid_2_o | ~p1_rreq_2) begin + if (rreq_2_ack_o) begin p1_rreq_2 <= rreq_2_i; p1_raddr_2 <= raddr_2_i; + end else if (rvalid_2_o) begin + p1_rreq_2 <= 0; + p1_raddr_2 <= 0; end end // Hit signal always_comb begin for (integer i = 0; i < NWAY; i++) begin - tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == p1_tlb.tag && tag_bram_rdata[i][0][20] == valid_flag; - tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == p1_tlb.tag && tag_bram_rdata[i][1][20] == valid_flag; + tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == p1_tlb.tag && tag_bram_rdata[i][0][20]; + tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == p1_tlb.tag && tag_bram_rdata[i][1][20]; end end // IDLE & WAIT can return rvalid_o, but REQ must not return rvalid_o - assign rvalid_1_o = (rvalid_1 && p1_rreq_1 && state != REFILL_1_REQ) | (p1_tlb_miss && state == IDLE); - assign rvalid_2_o = (rvalid_2 && p1_rreq_2 && state != REFILL_2_REQ) | (p1_tlb_miss && state == IDLE); + assign rvalid_1_o = (hit[0] && p1_rreq_1 && state != REFILL_1_REQ && state != CACOP_INVALID_1) | (p1_tlb_miss && state == IDLE); + assign rvalid_2_o = (hit[1] && p1_rreq_2 && state != REFILL_2_REQ && state != CACOP_INVALID_1) | (p1_tlb_miss && state == IDLE); // Generate read output always_comb begin - rvalid_1 = 0; + hit[0] = 0; rdata_1_o = 0; - rvalid_2 = 0; + hit[1] = 0; rdata_2_o = 0; for (integer i = 0; i < NWAY; i++) begin if (tag_hit[i][0]) begin - rvalid_1 = 1; + hit[0] = 1; rdata_1_o = data_bram_rdata[i][0]; end if (tag_hit[i][1]) begin - rvalid_2 = 1; + hit[1] = 1; rdata_2_o = data_bram_rdata[i][1]; end end end - // Refill state machine - always_ff @(posedge clk) begin - if (rst) begin - state <= IDLE; - end else begin - state <= next_state; - end - end - - always_comb begin : transition_comb - case (state) - IDLE: begin - if (p1_tlb_miss) next_state = IDLE; // No refilling if TLB miss - else if (miss_1) next_state = REFILL_1_REQ; - else if (miss_2) next_state = REFILL_2_REQ; - else next_state = IDLE; - end - REFILL_1_REQ: begin - if (axi_rdy_i) next_state = REFILL_1_WAIT; - else next_state = REFILL_1_REQ; - end - REFILL_2_REQ: begin - if (axi_rdy_i) next_state = REFILL_2_WAIT; - else next_state = REFILL_2_REQ; - end - REFILL_1_WAIT: begin - if (rvalid_1) begin - if (miss_2) next_state = REFILL_2_REQ; - else next_state = IDLE; - end else next_state = REFILL_1_WAIT; - end - REFILL_2_WAIT: begin - if (rvalid_2) begin - next_state = IDLE; - end else next_state = REFILL_2_WAIT; - end - default: begin - next_state = IDLE; - end - endcase - end - + // AXI handshake // Read request to AXI Controller // Use result from TLB always_comb begin case (state) REFILL_1_REQ, REFILL_1_WAIT: begin - axi_rreq_o = (miss_1 ? 1 : 0) & ~axi_rvalid_i; - axi_addr_o = miss_1 ? {real_tag, p1_raddr_1[11:0]} : 0; + axi_rreq_o = miss_1 & ~axi_rvalid_i & ~axi_rvalid_delay_1; + axi_addr_o = miss_1 ? {p1_tlb.tag, p1_raddr_1[11:0]} : 0; end REFILL_2_REQ, REFILL_2_WAIT: begin - axi_rreq_o = (miss_2 ? 1 : 0) & ~axi_rvalid_i; - axi_addr_o = miss_2 ? {real_tag, p1_raddr_2[11:0]} : 0; + axi_rreq_o = miss_2 & ~axi_rvalid_i & ~axi_rvalid_delay_1; + axi_addr_o = miss_2 ? {p1_tlb.tag, p1_raddr_2[11:0]} : 0; end default: begin axi_rreq_o = 0; @@ -358,8 +452,7 @@ module icache endcase end - // Refill write BRAM - logic [2:0] random_r; + // LSFR lfsr #( .WIDTH(3) ) u_lfsr ( @@ -368,28 +461,75 @@ module icache .en (1'b1), .value(random_r) ); - always_comb begin - for (integer i = 0; i < NWAY; i++) begin - tag_bram_we[i] = 0; - tag_bram_wdata[i] = 0; - data_bram_we[i] = 0; - data_bram_wdata[i] = 0; - if (i[0] == random_r[0]) begin - // write this way - if (state == REFILL_1_WAIT && axi_rvalid_i) begin - tag_bram_we[i][0] = 1; - tag_bram_wdata[i][0] = {valid_flag, p1_tlb.tag}; - data_bram_we[i][0] = 1; - data_bram_wdata[i][0] = axi_data_i; - end - if (state == REFILL_2_WAIT && axi_rvalid_i) begin - tag_bram_we[i][1] = 1; - tag_bram_wdata[i][1] = {valid_flag, p1_tlb.tag}; - data_bram_we[i][1] = 1; - data_bram_wdata[i][1] = axi_data_i; - end - end + + // BRAM instantiation + generate + for (genvar i = 0; i < NWAY; i++) begin : bram_ip +`ifdef BRAM_IP + bram_icache_tag_ram u_tag_bram ( + .clka (clk), + .clkb (clk), + .ena (tag_bram_en[i][0]), + .enb (tag_bram_en[i][1]), + .wea (tag_bram_we[i][0]), + .web (tag_bram_we[i][1]), + .dina (tag_bram_wdata[i][0]), + .addra(tag_bram_addr[i][0]), + .douta(tag_bram_rdata[i][0]), + .dinb (tag_bram_wdata[i][1]), + .addrb(tag_bram_addr[i][1]), + .doutb(tag_bram_rdata[i][1]) + ); + bram_icache_data_ram u_data_bram ( + .clka (clk), + .clkb (clk), + .ena (data_bram_en[i][0]), + .enb (data_bram_en[i][1]), + .wea (data_bram_we[i][0]), + .web (data_bram_we[i][1]), + .dina (data_bram_wdata[i][0]), + .addra(data_bram_addr[i][0]), + .douta(data_bram_rdata[i][0]), + .dinb (data_bram_wdata[i][1]), + .addrb(data_bram_addr[i][1]), + .doutb(data_bram_rdata[i][1]) + ); +`else + + bram #( + .DATA_WIDTH (TAG_BRAM_WIDTH), + .DATA_DEPTH_EXP2($clog2(NSET)) + ) u_tag_bram ( + .clk (clk), + .ena (tag_bram_en[i][0]), + .enb (tag_bram_en[i][1]), + .wea (tag_bram_we[i][0]), + .web (tag_bram_we[i][1]), + .dina (tag_bram_wdata[i][0]), + .addra(tag_bram_addr[i][0]), + .douta(tag_bram_rdata[i][0]), + .dinb (tag_bram_wdata[i][1]), + .addrb(tag_bram_addr[i][1]), + .doutb(tag_bram_rdata[i][1]) + ); + bram #( + .DATA_WIDTH (ICACHELINE_WIDTH), + .DATA_DEPTH_EXP2($clog2(NSET)) + ) u_data_bram ( + .clk (clk), + .ena (data_bram_en[i][0]), + .enb (data_bram_en[i][1]), + .wea (data_bram_we[i][0]), + .web (data_bram_we[i][1]), + .dina (data_bram_wdata[i][0]), + .addra(data_bram_addr[i][0]), + .douta(data_bram_rdata[i][0]), + .dinb (data_bram_wdata[i][1]), + .addrb(data_bram_addr[i][1]), + .doutb(data_bram_rdata[i][1]) + ); +`endif end - end + endgenerate endmodule diff --git a/src/vsrc/instr_buffer.sv b/src/vsrc/instr_buffer.sv index 3630baf..d5e2f49 100644 --- a/src/vsrc/instr_buffer.sv +++ b/src/vsrc/instr_buffer.sv @@ -5,7 +5,7 @@ module instr_buffer #( parameter IF_WIDTH = 2, parameter ID_WIDTH = 2, - parameter BUFFER_SIZE = 8 + parameter BUFFER_SIZE = 16 ) ( input logic clk, input logic rst, diff --git a/src/vsrc/pipeline/1_decode/id.sv b/src/vsrc/pipeline/1_decode/id.sv index a9d1180..3c49ad8 100644 --- a/src/vsrc/pipeline/1_decode/id.sv +++ b/src/vsrc/pipeline/1_decode/id.sv @@ -47,7 +47,7 @@ module id assign is_last_in_block = instr_buffer_i.valid ? instr_buffer_i.is_last_in_block : 0; // Exception info - logic excp; + logic excp, excp_nop; logic excp_ine; logic excp_ipe; logic [8:0] excp_num; // IPE, INE, BREAK, SYSCALL, {4 frontend excp}, INT @@ -222,15 +222,15 @@ module id // 只要是 IB 输入的指令,那么一律认为是有效的 // 如果在 ID 级发生了异常或在此之前就有异常,那么全部认为是 NOP, 但是是有效指令,以便进行异常处理 assign dispatch_o.instr_info.valid = instr_buffer_i.valid; - assign dispatch_o.use_imm = instr_valid ? instr_use_imm : 0; - assign dispatch_o.imm = instr_valid ? instr_imm : 0; - assign dispatch_o.aluop = instr_valid ? instr_aluop : 0; - assign dispatch_o.alusel = instr_valid ? instr_alusel : 0; - assign dispatch_o.reg_write_valid = instr_valid ? instr_reg_write_valid : 0; - assign dispatch_o.reg_write_addr = instr_valid ? instr_reg_write_addr : 0; + assign dispatch_o.use_imm = excp_nop ? 0 : instr_use_imm; + assign dispatch_o.imm = excp_nop ? 0 : instr_imm; + assign dispatch_o.aluop = excp_nop ? 0 : instr_aluop; + assign dispatch_o.alusel = excp_nop ? 0 : instr_alusel; + assign dispatch_o.reg_write_valid = excp_nop ? 0 : instr_reg_write_valid; + assign dispatch_o.reg_write_addr = excp_nop ? 0 : instr_reg_write_addr; // Generate output to Regfile - assign dispatch_o.reg_read_valid = instr_valid ? instr_reg_read_valid : 0; - assign dispatch_o.reg_read_addr = instr_valid ? instr_reg_read_addr : 0; + assign dispatch_o.reg_read_valid = excp_nop ? 0 : instr_reg_read_valid; + assign dispatch_o.reg_read_addr = excp_nop ? 0 : instr_reg_read_addr; // Generate instr info pack assign dispatch_o.instr_info.pc = pc_i; assign dispatch_o.instr_info.instr = inst_i; @@ -245,6 +245,7 @@ module id assign excp_ine = ~instr_valid & instr_buffer_i.valid; // If IB input is valid, but no valid decode result, then INE is triggered assign excp_ipe = kernel_instr && (csr_plv == 2'b11); + assign excp_nop = excp_ipe | instr_buffer_i.excp | excp_ine; assign excp = excp_ipe | instr_syscall | instr_break | instr_buffer_i.excp | excp_ine | has_int; assign excp_num = { excp_ipe, excp_ine, instr_break, instr_syscall, instr_buffer_i.excp_num, has_int diff --git a/src/vsrc/pipeline/3_execution/ex.sv b/src/vsrc/pipeline/3_execution/ex.sv index d989708..6b823c9 100644 --- a/src/vsrc/pipeline/3_execution/ex.sv +++ b/src/vsrc/pipeline/3_execution/ex.sv @@ -40,8 +40,9 @@ module ex output ex_dispatch_struct ex_data_forward, - // -> Cache + // <-> Cache output logic icacop_op_en, + input logic icacop_op_ack_i, output logic dcacop_op_en, output logic [1:0] cacop_op_mode, @@ -124,9 +125,9 @@ module ex aluop_i == `EXE_RDCNTVH_OP ? timer_64[63:32] : dispatch_i.csr_reg_data; - + //cache ins - logic cacop_instr,icacop_inst,dcacop_inst; + logic cacop_instr, icacop_inst, dcacop_inst; logic [4:0] cacop_op; assign cacop_op = inst_i[4:0]; assign cacop_instr = aluop_i == `EXE_CACOP_OP; @@ -279,7 +280,8 @@ module ex ); - assign stallreq = muldiv_op & ~muldiv_finished; + assign stallreq = (muldiv_op & ~muldiv_finished) | // Multiply & Division + (icacop_inst & ~icacop_op_ack_i); // CACOP assign tlb_stallreq = aluop_i == `EXE_TLBRD_OP | aluop_i == `EXE_TLBSRCH_OP; always @(*) begin diff --git a/src/vsrc/utils/bram.sv b/src/vsrc/utils/bram.sv index d59eced..7fdd715 100644 --- a/src/vsrc/utils/bram.sv +++ b/src/vsrc/utils/bram.sv @@ -6,6 +6,8 @@ module bram #( parameter DATA_DEPTH_EXP2 = 8 ) ( input logic clk, + input logic ena, // Chip enable A + input logic enb, // Chip enable B input logic wea, // Write enable A input logic web, // Write enable B @@ -29,18 +31,23 @@ module bram #( // Read logic always_ff @(posedge clk) begin - douta <= data[addra]; - doutb <= data[addrb]; + if (ena & wea) douta <= dina; + else if (ena) douta <= data[addra]; + else douta <= 0; + + if (enb & web) doutb <= dinb; + else if (enb) doutb <= data[addrb]; + else doutb <= 0; end // Write logic always_ff @(posedge clk) begin - if (web) begin + if (enb & web) begin data[addrb[DATA_DEPTH_EXP2-1:0]] <= dinb; end // A port has priority - if (wea) begin + if (ena & wea) begin data[addra[DATA_DEPTH_EXP2-1:0]] <= dina; end end