diff --git a/Bender.yml b/Bender.yml index e36916e6..288afd10 100644 --- a/Bender.yml +++ b/Bender.yml @@ -52,6 +52,8 @@ sources: - target: rtl files: # Level 0 + - src/midend/idma_mp_dist_midend.sv + - src/midend/idma_mp_split_midend.sv - src/midend/idma_nd_midend.sv - src/midend/idma_rt_midend.sv @@ -75,10 +77,12 @@ sources: files: # Level 0 - src/frontend/desc64/idma_desc64_synth_pkg.sv + - src/midend/idma_mp_midend_synth_pkg.sv - src/midend/idma_nd_midend_synth.sv - src/midend/idma_rt_midend_synth_pkg.sv # Level 1 - src/frontend/desc64/idma_desc64_synth.sv + - src/midend/idma_mp_midend_synth.sv - src/midend/idma_rt_midend_synth.sv # Testbenches diff --git a/idma.mk b/idma.mk index e6110cd0..19b8e932 100644 --- a/idma.mk +++ b/idma.mk @@ -246,6 +246,11 @@ IDMA_RTL_DOC_ALL += $(IDMA_DOC_FIG_DIR)/graph/idma_rt_midend_synth.png IDMA_RTL_DOC_ALL += $(IDMA_HTML_DIR)/idma_rt_midend_synth/index.html IDMA_PICKLE_ALL += $(IDMA_PICKLE_DIR)/idma_rt_midend_synth.sv +# Mempool midend +IDMA_RTL_DOC_ALL += $(IDMA_DOC_FIG_DIR)/graph/idma_mp_midend_synth.png +IDMA_RTL_DOC_ALL += $(IDMA_HTML_DIR)/idma_mp_midend_synth/index.html +IDMA_PICKLE_ALL += $(IDMA_PICKLE_DIR)/idma_mp_midend_synth.sv + # -------------- # QuestaSim @@ -267,7 +272,7 @@ endef $(IDMA_VSIM_DIR)/compile.tcl: $(IDMA_BENDER_FILES) $(IDMA_TB_ALL) $(IDMA_RTL_ALL) $(BENDER) update $(BENDER) checkout - $(call idma_generate_vsim, $@, -t sim -t test -t rtl -t asic,../../..) + $(call idma_generate_vsim, $@, -t sim -t test -t synth -t rtl -t asic,../../..) idma_sim_clean: rm -rf $(IDMA_VSIM_DIR)/compile.tcl diff --git a/jobs/jobs.json b/jobs/jobs.json index 17d31b67..b978507a 100644 --- a/jobs/jobs.json +++ b/jobs/jobs.json @@ -138,7 +138,7 @@ }, "params" : { }, - "proc_id" : "rw_axi", + "proc_id" : "none", "testbench" : "tb_idma_desc64_top", "synth_top" : "idma_desc64_synth" }, @@ -148,7 +148,7 @@ }, "params" : { }, - "proc_id" : "rw_axi", + "proc_id" : "none", "testbench" : "tb_idma_desc64_bench", "synth_top" : "idma_desc64_synth" }, @@ -158,8 +158,18 @@ }, "params" : { }, - "proc_id" : "rw_axi", + "proc_id" : "none", "testbench" : "tb_idma_rt_midend", "synth_top" : "idma_rt_midend_synth" + }, + "mp_midend": { + "jobs" : { + "simple" : "jobs.json" + }, + "params" : { + }, + "proc_id" : "none", + "testbench" : "idma_mp_midend_synth", + "synth_top" : "idma_mp_midend_synth" } } diff --git a/src/midend/idma_mp_dist_midend.sv b/src/midend/idma_mp_dist_midend.sv new file mode 100644 index 00000000..f746314b --- /dev/null +++ b/src/midend/idma_mp_dist_midend.sv @@ -0,0 +1,197 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Samuel Riedel +// - Thomas Benz + +`include "common_cells/registers.svh" + +/// Distribute DMA requests over several backends +module idma_mp_dist_midend #( + /// Number of back-ends + parameter int unsigned NumBEs = 32'd1, + /// Size of the region that one port covers in bytes + parameter int unsigned RegionWidth = 32'd1, + /// Base address of the regions + parameter int unsigned RegionStart = 32'h0000_0000, + /// End address of the regions + parameter int unsigned RegionEnd = 32'h1000_0000, + /// Address Width + parameter int unsigned AddrWidth = 32'd32, + /// Print information on transfers + parameter bit PrintInfo = 1'b0, + /// DMA iDMA type + parameter type idma_req_t = logic, + /// DMA iDMA request type + parameter type idma_rsp_t = logic +) ( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// Burst request manager + input idma_req_t idma_req_i, + /// iDMA request valid manager + input logic idma_req_valid_i, + /// iDMA request ready manager + output logic idma_req_ready_o, + /// iDMA response manager + output idma_rsp_t idma_rsp_o, + /// iDMA response valid manager + output logic idma_rsp_valid_o, + /// iDMA response ready manager + input logic idma_rsp_ready_i, + /// DMA busy manager + output idma_pkg::idma_busy_t idma_busy_o, + // Subordinate Port + /// iDMA request subordinate + output idma_req_t [NumBEs-1:0] idma_req_o, + /// iDMA request valid subordinate + output logic [NumBEs-1:0] idma_req_valid_o, + /// iDMA request ready subordinate + input logic [NumBEs-1:0] idma_req_ready_i, + /// iDMA response subordinate + input idma_rsp_t [NumBEs-1:0] idma_rsp_i, + /// iDMA response valid subordinate + input logic [NumBEs-1:0] idma_rsp_valid_i, + /// iDMA response ready subordinate + output logic [NumBEs-1:0] idma_rsp_ready_o, + /// DMA busy subordinate + input idma_pkg::idma_busy_t [NumBEs-1:0] idma_busy_i +); + + localparam DmaRegionAddressBits = $clog2(RegionWidth); + localparam FullRegionAddressBits = $clog2(RegionWidth*NumBEs); + + typedef logic [FullRegionAddressBits:0] full_addr_t; + + // Handle Metadata + logic [NumBEs-1:0] trans_complete_d, trans_complete_q; + logic [NumBEs-1:0] tie_off_trans_complete_d, tie_off_trans_complete_q; + idma_pkg::idma_busy_t [NumBEs-1:0] backend_busy_d, backend_busy_q; + + // bypass + assign idma_rsp_valid_o = &trans_complete_q; + assign idma_busy_o = &backend_busy_q; + assign idma_rsp_o = |idma_rsp_i; + assign idma_rsp_ready_o = idma_rsp_ready_i ? '1 : '0; + + // TODO We could have multiple outstanding requests per port, so we need multiple trans_complete_tie_offs + always_comb begin : proc_handle_status + trans_complete_d = trans_complete_q; + backend_busy_d = backend_busy_q; + for (int unsigned i = 0; i < NumBEs; i++) begin + trans_complete_d[i] = trans_complete_q[i] | idma_rsp_valid_i[i]| tie_off_trans_complete_q[i]; + backend_busy_d[i] = idma_busy_i[i]; + end + if (idma_rsp_valid_o) begin + trans_complete_d = '0; + end + end + `FF(trans_complete_q, trans_complete_d, '0, clk_i, rst_ni) + `FF(tie_off_trans_complete_q, tie_off_trans_complete_d, '0, clk_i, rst_ni) + `FF(backend_busy_q, backend_busy_d, '1, clk_i, rst_ni) + + // Fork + logic [NumBEs-1:0] valid, ready; + stream_fork #( + .N_OUP (NumBEs) + ) i_stream_fork ( + .clk_i, + .rst_ni, + .valid_i ( idma_req_valid_i ), + .ready_o ( idma_req_ready_o ), + .valid_o ( valid ), + .ready_i ( ready ) + ); + + full_addr_t src_addr, dst_addr, start_addr, end_addr; + + assign src_addr = idma_req_i.src_addr[FullRegionAddressBits-1:0]; + assign dst_addr = idma_req_i.dst_addr[FullRegionAddressBits-1:0]; + + always_comb begin : proc_split + if (($unsigned(idma_req_i.src_addr) >= RegionStart) && + ($unsigned(idma_req_i.src_addr) < RegionEnd )) begin + start_addr = src_addr; + end else begin + start_addr = dst_addr; + end + end_addr = start_addr + idma_req_i.length; + // Connect valid ready by default + idma_req_valid_o = valid; + ready = idma_req_ready_i; + // Do not interfere with metadata per default + tie_off_trans_complete_d = '0; + + for (int i = 0; i < NumBEs; i++) begin + // Feed metadata through directly + idma_req_o[i] = idma_req_i; + // Feed through the address bits + idma_req_o[i].src_addr = idma_req_i.src_addr; + idma_req_o[i].dst_addr = idma_req_i.dst_addr; + // Modify lower addresses bits and size + if (($unsigned(start_addr) >= (i+1)*RegionWidth) || + ($unsigned(end_addr) <= i*RegionWidth )) begin + // We are not involved in the transfer + idma_req_o[i].src_addr = '0; + idma_req_o[i].dst_addr = '0; + idma_req_o[i].length = 1; + // Make handshake ourselves + idma_req_valid_o[i] = 1'b0; + ready[i] = 1'b1; + // Inject trans complete + if (valid) begin + tie_off_trans_complete_d[i] = 1'b1; + end + end else if (($unsigned(start_addr) >= i*RegionWidth)) begin + // First (and potentially only) slice + // Leave address as is + if ($unsigned(end_addr) <= (i+1)*RegionWidth) begin + idma_req_o[i].length = idma_req_i.length; + end else begin + idma_req_o[i].length = RegionWidth - start_addr[DmaRegionAddressBits-1:0]; + end + end else begin + // Round up the address to the next DMA boundary + if (($unsigned(idma_req_i.src_addr) >= RegionStart) && + ($unsigned(idma_req_i.src_addr) < RegionEnd )) begin + idma_req_o[i].src_addr[FullRegionAddressBits-1:0] = i*RegionWidth; + idma_req_o[i].dst_addr = idma_req_i.dst_addr + i*RegionWidth - + start_addr[DmaRegionAddressBits-1:0]; + end else begin + idma_req_o[i].src_addr = idma_req_i.src_addr + i*RegionWidth - + start_addr[DmaRegionAddressBits-1:0]; + idma_req_o[i].dst_addr[FullRegionAddressBits-1:0] = i*RegionWidth; + end + if ($unsigned(end_addr) >= (i+1)*RegionWidth) begin + // Middle slice + // Emit a full-sized transfer + idma_req_o[i].length = RegionWidth; + end else begin + // Last slice + idma_req_o[i].length = end_addr[DmaRegionAddressBits-1:0]; + end + end + end + end + + // pragma translate_off + always_ff @(posedge clk_i or negedge rst_ni) begin + if (PrintInfo) begin + if (rst_ni && idma_req_valid_i && idma_req_ready_o) begin + $display("[idma_distributed_midend] Got request"); + $display("Request in: From: 0x%8x To: 0x%8x with size %d", + idma_req_i.src_addr, idma_req_i.dst_addr, idma_req_i.length); + for (int i = 0; i < NumBEs; i++) begin + $display("Out %6d: From: 0x%8x To: 0x%8x with size %d", + i, idma_req_o[i].src_addr, idma_req_o[i].dst_addr, idma_req_o[i].length); + end + end + end + end + // pragma translate_on + +endmodule diff --git a/src/midend/idma_mp_midend_synth.sv b/src/midend/idma_mp_midend_synth.sv new file mode 100644 index 00000000..c6ebcf2b --- /dev/null +++ b/src/midend/idma_mp_midend_synth.sv @@ -0,0 +1,125 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Thomas Benz + +`include "common_cells/registers.svh" + +/// Synthesis wrapper for the Mempool mid-ends +module idma_mp_midend_synth #( + /// Number of back-ends + parameter int unsigned NumBEs = idma_mp_midend_synth_pkg::NumBEs, + /// Size of the region that one port covers in bytes + parameter int unsigned RegionWidth = idma_mp_midend_synth_pkg::RegionWidth, + /// Base address of the regions + parameter int unsigned RegionStart = idma_mp_midend_synth_pkg::RegionStart, + /// End address of the regions + parameter int unsigned RegionEnd = idma_mp_midend_synth_pkg::RegionEnd, + /// Address Width + parameter int unsigned AddrWidth = 32'd32, + /// Print information on transfers + parameter bit PrintInfo = 1'b0, + /// DMA iDMA type + parameter type idma_req_t = idma_mp_midend_synth_pkg::idma_req_t, + /// DMA iDMA request type + parameter type idma_rsp_t = idma_mp_midend_synth_pkg::idma_rsp_t +) ( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// Burst request manager + input idma_req_t idma_req_i, + /// iDMA request valid manager + input logic idma_req_valid_i, + /// iDMA request ready manager + output logic idma_req_ready_o, + /// iDMA response manager + output idma_rsp_t idma_rsp_o, + /// iDMA response valid manager + output logic idma_rsp_valid_o, + /// iDMA response ready manager + input logic idma_rsp_ready_i, + /// DMA busy manager + output idma_pkg::idma_busy_t idma_busy_o, + // Subordinate Port + /// iDMA request subordinate + output idma_req_t [NumBEs-1:0] idma_req_o, + /// iDMA request valid subordinate + output logic [NumBEs-1:0] idma_req_valid_o, + /// iDMA request ready subordinate + input logic [NumBEs-1:0] idma_req_ready_i, + /// iDMA response subordinate + input idma_rsp_t [NumBEs-1:0] idma_rsp_i, + /// iDMA response valid subordinate + input logic [NumBEs-1:0] idma_rsp_valid_i, + /// iDMA response ready subordinate + output logic [NumBEs-1:0] idma_rsp_ready_o, + /// DMA busy subordinate + input idma_pkg::idma_busy_t [NumBEs-1:0] idma_busy_i +); + + + idma_req_t idma_req; + logic idma_req_valid; + logic idma_req_ready; + idma_rsp_t idma_rsp; + logic idma_rsp_valid; + logic idma_rsp_ready; + + idma_mp_split_midend #( + .RegionWidth ( RegionWidth ), + .RegionStart ( RegionStart ), + .RegionEnd ( RegionEnd ), + .AddrWidth ( AddrWidth ), + .PrintInfo ( PrintInfo ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ) + ) i_idma_mp_split_midend ( + .clk_i, + .rst_ni, + .idma_req_i, + .idma_req_valid_i, + .idma_req_ready_o, + .idma_rsp_o, + .idma_rsp_valid_o, + .idma_rsp_ready_i, + .idma_req_o ( idma_req ), + .idma_req_valid_o ( idma_req_valid ), + .idma_req_ready_i ( idma_req_ready ), + .idma_rsp_i ( idma_rsp ), + .idma_rsp_valid_i ( idma_rsp_valid ), + .idma_rsp_ready_o ( idma_rsp_ready ) + ); + + idma_mp_dist_midend #( + .NumBEs ( NumBEs ), + .RegionWidth ( RegionWidth ), + .RegionStart ( RegionStart ), + .RegionEnd ( RegionEnd ), + .AddrWidth ( AddrWidth ), + .PrintInfo ( PrintInfo ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ) + ) i_idma_mp_dist_midend ( + .clk_i, + .rst_ni, + .idma_req_i ( idma_req ), + .idma_req_valid_i ( idma_req_valid ), + .idma_req_ready_o ( idma_req_ready ), + .idma_rsp_o ( idma_rsp ), + .idma_rsp_valid_o ( idma_rsp_valid ), + .idma_rsp_ready_i ( idma_rsp_ready ), + .idma_busy_o, + .idma_req_o, + .idma_req_valid_o, + .idma_req_ready_i, + .idma_rsp_i, + .idma_rsp_valid_i, + .idma_rsp_ready_o, + .idma_busy_i + ); + +endmodule diff --git a/src/midend/idma_mp_midend_synth_pkg.sv b/src/midend/idma_mp_midend_synth_pkg.sv new file mode 100644 index 00000000..09aca54a --- /dev/null +++ b/src/midend/idma_mp_midend_synth_pkg.sv @@ -0,0 +1,25 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Thomas Benz + +`include "idma/typedef.svh" + +/// Synthesis package for the Mempool midend +package idma_mp_midend_synth_pkg; + + localparam int unsigned NumBEs = 32'd8; + localparam int unsigned RegionWidth = 32'h0001_0000; + localparam int unsigned RegionStart = 32'h0000_0000; + localparam int unsigned RegionEnd = 32'h1000_0000; + + typedef logic [5:0] axi_id_t; + typedef logic [31:0] tf_len_t; + typedef logic [31:0] axi_addr_t; + + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, axi_addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, axi_addr_t) + +endpackage diff --git a/src/midend/idma_mp_split_midend.sv b/src/midend/idma_mp_split_midend.sv new file mode 100644 index 00000000..6375329d --- /dev/null +++ b/src/midend/idma_mp_split_midend.sv @@ -0,0 +1,185 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Samuel Riedel +// - Thomas Benz + +`include "common_cells/registers.svh" + +/// Splits DMA transactions along a given region boundaries +module idma_mp_split_midend #( + /// Size of the region that one port covers in bytes + parameter int unsigned RegionWidth = 32'd1, + /// Base address of the regions + parameter int unsigned RegionStart = 32'h0000_0000, + /// End address of the regions + parameter int unsigned RegionEnd = 32'h1000_0000, + /// Address Width + parameter int unsigned AddrWidth = 32'd32, + /// Print information on transfers + parameter bit PrintInfo = 1'b0, + /// DMA iDMA type + parameter type idma_req_t = logic, + /// DMA iDMA request type + parameter type idma_rsp_t = logic +) ( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// Burst request manager + input idma_req_t idma_req_i, + /// iDMA request valid manager + input logic idma_req_valid_i, + /// iDMA request ready manager + output logic idma_req_ready_o, + /// iDMA response manager + output idma_rsp_t idma_rsp_o, + /// iDMA response valid manager + output logic idma_rsp_valid_o, + /// iDMA response ready manager + input logic idma_rsp_ready_i, + // Subordinate Port + /// iDMA request subordinate + output idma_req_t idma_req_o, + /// iDMA request valid subordinate + output logic idma_req_valid_o, + /// iDMA request ready subordinate + input logic idma_req_ready_i, + /// iDMA response subordinate + input idma_rsp_t idma_rsp_i, + /// iDMA response valid subordinate + input logic idma_rsp_valid_i, + /// iDMA response ready subordinate + output logic idma_rsp_ready_o +); + + /// Width of the address regions + localparam DmaRegionAddressBits = $clog2(RegionWidth); + + /// Address type + typedef logic [AddrWidth-1:0] addr_t; + + addr_t start_addr, end_addr; + logic req_valid; + + // Bypass the response port + assign idma_rsp_o = idma_rsp_i; + assign idma_rsp_ready_o = idma_rsp_ready_i; + + // Handle Metadata + // Forward idle signal and count the trans_comlete signal + logic [31:0] num_trans_d, num_trans_q; + + always_comb begin : proc_handle_meta + num_trans_d = num_trans_q; + idma_rsp_valid_o = 1'b0; + if (req_valid) begin + num_trans_d += 1; + end + if (idma_rsp_valid_o & idma_rsp_ready_i) begin + num_trans_d -= 1; + end + if (num_trans_q == 1 && num_trans_d == 0) begin + idma_rsp_valid_o = 1'b1; + end + end + `FF(num_trans_q, num_trans_d, '0, clk_i, rst_ni) + + // Split requests + always_comb begin : proc_ + if (($unsigned(idma_req_i.src_addr) >= RegionStart) && + ($unsigned(idma_req_i.src_addr) < RegionEnd )) begin + start_addr = idma_req_i.src_addr; + end else begin + start_addr = idma_req_i.dst_addr; + end + end_addr = start_addr + idma_req_i.length; + end + + enum logic {Idle, Busy} state_d, state_q; + idma_req_t req_d, req_q; + + `FFARN(state_q, state_d, Idle, clk_i, rst_ni) + `FFARN(req_q, req_d, '0, clk_i, rst_ni) + + always_comb begin : proc_splitting + // defaults + state_d = state_q; + req_d = req_q; + idma_req_o = idma_req_i; + idma_req_valid_o = 1'b0; + idma_req_ready_o = 1'b0; + req_valid = 1'b0; + + unique case (state_q) + Idle: begin + if (idma_req_valid_i) begin + if (RegionWidth-start_addr[DmaRegionAddressBits-1:0] >= idma_req_i.length) begin + // No splitting required, just forward + idma_req_valid_o = 1'b1; + idma_req_ready_o = idma_req_ready_i; + req_valid = idma_req_valid_i; + end else begin + // Splitting required + // Store and acknowledge + req_d = idma_req_i; + idma_req_ready_o = 1'b1; + // Feed through the first request and modify it's size + idma_req_o.length = RegionWidth - start_addr[DmaRegionAddressBits-1:0]; + // Forward request + idma_req_valid_o = 1'b1; + if (idma_req_ready_i) begin + // Increment the address and reduce the number of outstanding splits + req_d.length -= RegionWidth - start_addr[DmaRegionAddressBits-1:0]; + req_d.src_addr += RegionWidth - start_addr[DmaRegionAddressBits-1:0]; + req_d.dst_addr += RegionWidth - start_addr[DmaRegionAddressBits-1:0]; + req_valid = 1'b1; + end + state_d = Busy; + end + end + end + Busy: begin + // Sent next burst from split. + idma_req_o = req_q; + idma_req_valid_o = 1'b1; + req_valid = idma_req_ready_i; + if (req_q.length <= RegionWidth) begin + // Last split + if (idma_req_ready_i) begin + state_d = Idle; + end + end else begin + // Clip size and increment address + idma_req_o.length = RegionWidth; + if (idma_req_ready_i) begin + req_d.length = req_q.length - RegionWidth; + req_d.src_addr = req_q.src_addr + RegionWidth; + req_d.dst_addr = req_q.dst_addr + RegionWidth; + end + end + end + default: /*do nothing*/; + endcase + end + + // pragma translate_off + always_ff @(posedge clk_i or negedge rst_ni) begin + if (PrintInfo) begin + if (rst_ni && idma_req_valid_i && idma_req_ready_o) begin + $display("[idma_split_midend] Got request"); + $display("Split: Request in: From: 0x%8x To: 0x%8x with size %d", + idma_req_i.src_addr, idma_req_i.dst_addr, idma_req_i.length); + end + if (rst_ni && idma_req_valid_o && idma_req_ready_i) begin + $display("Split: Out %6d: From: 0x%8x To: 0x%8x with size %d", + num_trans_q, idma_req_o.src_addr, idma_req_o.dst_addr, idma_req_o.length); + end + end + end + // pragma translate_on + +endmodule