From d2eeeeed1449c270ee545bedfb8ecd8c787d55c1 Mon Sep 17 00:00:00 2001 From: Thomas Benz Date: Thu, 11 Aug 2022 23:23:04 +0200 Subject: [PATCH] Create a dataflow-oriented descriptor-based iDMA frontend supporting prefetching. - tracer: Add WIP version of the iDMA tracer (#9), add some fixes (#14) - frontends/desc64: Transition Regbus master to AXI master - jobs.json: Add descriptor-based testbench to the job file - frontends/desc64: Update synth module for descriptor frontend - test/frontends: Add testbench for benchmarking (***caution: not to be used as VIP***) - frontends/desc64: Remove unused shared counter - Makefile: Quote paths to handle spaces in paths - frontends/desc64: Add prefetching design --- .gitlab-ci.yml | 10 + Bender.yml | 10 +- Makefile | 60 +- jobs.json | 12 + scripts/frontend/bench.tcl | 24 + scripts/frontend/run-no-chain.tcl | 22 + scripts/frontend/run-one.tcl | 21 + scripts/frontend/run-only-chain.tcl | 23 + scripts/frontend/run.tcl | 18 + scripts/waves/vsim_fe_desc64.do | 17 + src/frontends/desc64/idma_desc64_ar_gen.sv | 132 +++ .../desc64/idma_desc64_ar_gen_prefetch.sv | 308 ++++++ src/frontends/desc64/idma_desc64_reader.sv | 177 ++++ .../desc64/idma_desc64_reader_gater.sv | 55 ++ .../desc64/idma_desc64_reg_wrapper.sv | 40 +- src/frontends/desc64/idma_desc64_reshaper.sv | 62 ++ .../desc64/idma_desc64_shared_counter.sv | 57 -- src/frontends/desc64/idma_desc64_synth.sv | 96 +- src/frontends/desc64/idma_desc64_synth_pkg.sv | 36 +- src/frontends/desc64/idma_desc64_top.sv | 912 ++++++++---------- src/include/idma/tracer.svh | 101 ++ src/systems/cva6_desc/dma_desc_synth.sv | 59 ++ src/systems/cva6_desc/dma_desc_synth_pkg.sv | 22 + src/systems/cva6_desc/dma_desc_wrap.sv | 227 +++-- test/frontends/tb_idma_desc64_bench.sv | 910 +++++++++++++++++ test/frontends/tb_idma_desc64_top.sv | 468 +++++---- test/tb_idma_backend.sv | 19 +- test/tb_idma_nd_backend.sv | 19 +- util/trace.py | 73 ++ verilator/scripts/preprocess.py | 4 +- 30 files changed, 3078 insertions(+), 916 deletions(-) create mode 100644 scripts/frontend/bench.tcl create mode 100644 scripts/frontend/run-no-chain.tcl create mode 100644 scripts/frontend/run-one.tcl create mode 100644 scripts/frontend/run-only-chain.tcl create mode 100644 scripts/frontend/run.tcl create mode 100644 scripts/waves/vsim_fe_desc64.do create mode 100644 src/frontends/desc64/idma_desc64_ar_gen.sv create mode 100644 src/frontends/desc64/idma_desc64_ar_gen_prefetch.sv create mode 100644 src/frontends/desc64/idma_desc64_reader.sv create mode 100644 src/frontends/desc64/idma_desc64_reader_gater.sv create mode 100644 src/frontends/desc64/idma_desc64_reshaper.sv delete mode 100644 src/frontends/desc64/idma_desc64_shared_counter.sv create mode 100644 src/include/idma/tracer.svh create mode 100644 src/systems/cva6_desc/dma_desc_synth.sv create mode 100644 src/systems/cva6_desc/dma_desc_synth_pkg.sv create mode 100644 test/frontends/tb_idma_desc64_bench.sv create mode 100644 util/trace.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b7dc184e..e6903979 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -71,3 +71,13 @@ tiny-dma-run: job: prepare-non-free strategy: depend +frontend-descriptor-run: + stage: iDMA + needs: + - prepare-non-free + trigger: + include: + - artifact: idma-non-free/ci/gitlab-frontend-descriptor-ci.yml + job: prepare-non-free + strategy: depend + diff --git a/Bender.yml b/Bender.yml index 610786d6..c2400435 100644 --- a/Bender.yml +++ b/Bender.yml @@ -61,13 +61,20 @@ sources: - files: # 64bit descriptor frontend # Level 0 + - src/frontends/desc64/idma_desc64_ar_gen.sv + - src/frontends/desc64/idma_desc64_ar_gen_prefetch.sv + - src/frontends/desc64/idma_desc64_reader.sv + - src/frontends/desc64/idma_desc64_reader_gater.sv - src/frontends/desc64/idma_desc64_reg_pkg.sv - src/frontends/desc64/idma_desc64_reg_top.sv - - src/frontends/desc64/idma_desc64_shared_counter.sv + - src/frontends/desc64/idma_desc64_reshaper.sv # Level 1 - src/frontends/desc64/idma_desc64_reg_wrapper.sv # Level 2 - src/frontends/desc64/idma_desc64_top.sv + - src/systems/cva6_desc/dma_desc_wrap.sv + - src/systems/cva6_desc/dma_desc_synth_pkg.sv + - src/systems/cva6_desc/dma_desc_synth.sv # Systems - target: all(pulp, not(mchan)) @@ -99,3 +106,4 @@ sources: files: # Level 0 - test/frontends/tb_idma_desc64_top.sv + - test/frontends/tb_idma_desc64_bench.sv diff --git a/Makefile b/Makefile index 2479e7ae..cb441feb 100644 --- a/Makefile +++ b/Makefile @@ -291,39 +291,39 @@ REG_HTML_STRING = "\n\n\n $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.h - printf $(REG_HTML_STRING) > $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html - $(PYTHON) $(REG_TOOL) $(REG32_2D_HJSON) -d >> $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html - printf "\n" >> $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html - cp $(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css $(REG32_2D_FE_DIR) + "$(PYTHON)" "$(REG_TOOL)" "$(REG32_2D_HJSON)" -t "$(REG32_2D_FE_DIR)" -r + "$(PYTHON)" "$(REG_TOOL)" "$(REG32_2D_HJSON)" -D > "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.h" + printf $(REG_HTML_STRING) > "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html" + "$(PYTHON)" "$(REG_TOOL)" "$(REG32_2D_HJSON)" -d >> "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html" + printf "\n" >> "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.html" + cp "$(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css" "$(REG32_2D_FE_DIR)" reg64_regs: - $(PYTHON) $(REG_TOOL) $(REG64_HJSON) -t $(REG64_FE_DIR) -r - $(PYTHON) $(REG_TOOL) $(REG64_HJSON) -D > $(REG64_FE_DIR)/idma_reg64_frontend.h - printf $(REG_HTML_STRING) > $(REG64_FE_DIR)/idma_reg64_frontend.html - $(PYTHON) $(REG_TOOL) $(REG64_HJSON) -d >> $(REG64_FE_DIR)/idma_reg64_frontend.html - printf "\n" >> $(REG64_FE_DIR)/idma_reg64_frontend.html - cp $(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css $(REG64_FE_DIR) + "$(PYTHON)" "$(REG_TOOL)" "$(REG64_HJSON)" -t "$(REG64_FE_DIR)" -r + "$(PYTHON)" "$(REG_TOOL)" "$(REG64_HJSON)" -D > "$(REG64_FE_DIR)/idma_reg64_frontend.h" + printf $(REG_HTML_STRING) > "$(REG64_FE_DIR)/idma_reg64_frontend.html" + "$(PYTHON)" "$(REG_TOOL)" "$(REG64_HJSON)" -d >> "$(REG64_FE_DIR)/idma_reg64_frontend.html" + printf "\n" >> "$(REG64_FE_DIR)/idma_reg64_frontend.html" + cp "$(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css" "$(REG64_FE_DIR)" desc64_regs: - $(PYTHON) $(REG_TOOL) $(DESC64_HJSON) -t $(DESC64_FE_DIR) -r - $(PYTHON) $(REG_TOOL) $(DESC64_HJSON) -D > $(DESC64_FE_DIR)/idma_desc64_frontend.h - printf $(REG_HTML_STRING) > $(DESC64_FE_DIR)/idma_desc64_frontend.html - $(PYTHON) $(REG_TOOL) $(DESC64_HJSON) -d >> $(DESC64_FE_DIR)/idma_desc64_frontend.html - printf "\n" >> $(DESC64_FE_DIR)/idma_desc64_frontend.html - cp $(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css $(DESC64_FE_DIR) + "$(PYTHON)" "$(REG_TOOL)" "$(DESC64_HJSON)" -t "$(DESC64_FE_DIR)" -r + "$(PYTHON)" "$(REG_TOOL)" "$(DESC64_HJSON)" -D > "$(DESC64_FE_DIR)/idma_desc64_frontend.h" + printf $(REG_HTML_STRING) > "$(DESC64_FE_DIR)/idma_desc64_frontend.html" + "$(PYTHON)" "$(REG_TOOL)" "$(DESC64_HJSON)" -d >> "$(DESC64_FE_DIR)/idma_desc64_frontend.html" + printf "\n" >> "$(DESC64_FE_DIR)/idma_desc64_frontend.html" + cp "$(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css" "$(DESC64_FE_DIR)" regs_clean: - rm -f $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.h - rm -f $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend_reg_pkg.sv - rm -f $(REG32_2D_FE_DIR)/idma_reg32_2d_frontend_reg_top.sv - rm -f $(REG32_2D_FE_DIR)/reg_html.css - rm -f $(REG64_FE_DIR)/idma_reg64_frontend.h - rm -f $(REG64_FE_DIR)/idma_reg32_frontend_reg_pkg.sv - rm -f $(REG64_FE_DIR)/idma_reg32_frontend_reg_top.sv - rm -f $(REG64_FE_DIR)/reg_html.css - rm -f $(DESC64_FE_DIR)/idma_desc64_frontend.h - rm -f $(DESC64_FE_DIR)/idma_desc64_reg_pkg.sv - rm -f $(DESC64_FE_DIR)/idma_desc64_reg_top.sv - rm -f $(DESC64_FE_DIR)/reg_html.css + rm -f "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend.h" + rm -f "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend_reg_pkg.sv" + rm -f "$(REG32_2D_FE_DIR)/idma_reg32_2d_frontend_reg_top.sv" + rm -f "$(REG32_2D_FE_DIR)/reg_html.css" + rm -f "$(REG64_FE_DIR)/idma_reg64_frontend.h" + rm -f "$(REG64_FE_DIR)/idma_reg32_frontend_reg_pkg.sv" + rm -f "$(REG64_FE_DIR)/idma_reg32_frontend_reg_top.sv" + rm -f "$(REG64_FE_DIR)/reg_html.css" + rm -f "$(DESC64_FE_DIR)/idma_desc64_frontend.h" + rm -f "$(DESC64_FE_DIR)/idma_desc64_reg_pkg.sv" + rm -f "$(DESC64_FE_DIR)/idma_desc64_reg_top.sv" + rm -f "$(DESC64_FE_DIR)/reg_html.css" diff --git a/jobs.json b/jobs.json index cba7f4b6..3ad8c97e 100644 --- a/jobs.json +++ b/jobs.json @@ -79,5 +79,17 @@ "synth_top" : "idma_nd_backend_synth", "overrides" : { } + }, + "frontend-descriptor": { + "seed" : 1336, + "man_jobs" : { + "simple" : "/dev/null" + }, + "gen_jobs" : { + }, + "testbench" : "tb_idma_desc64_top", + "synth_top" : "idma_desc64_synth", + "overrides" : { + } } } diff --git a/scripts/frontend/bench.tcl b/scripts/frontend/bench.tcl new file mode 100644 index 00000000..5228d4ba --- /dev/null +++ b/scripts/frontend/bench.tcl @@ -0,0 +1,24 @@ +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Axel Vanoni + +source scripts/compile_vsim.tcl +vsim tb_idma_desc64_bench -t 1ps \ + -GNumberOfTests=150 \ + -GChainedDescriptors=20 \ + -GSimulationTimeoutCycles=300000 \ + -GTransferLength=24 \ + -GDoIRQ=0 \ + +trace_file=trace-test.log \ + -voptargs=+acc +#-voptargs=-pedantic + +set StdArithNoWarnings 1 +set NumericStdNoWarnings 1 +log -r /* + +source scripts/waves/vsim_fe_desc64.do + +run -all diff --git a/scripts/frontend/run-no-chain.tcl b/scripts/frontend/run-no-chain.tcl new file mode 100644 index 00000000..8f5edb30 --- /dev/null +++ b/scripts/frontend/run-no-chain.tcl @@ -0,0 +1,22 @@ +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Axel Vanoni + +# run frontend tests without chaining +source scripts/compile_vsim.tcl +vsim tb_idma_desc64_top -t 1ps \ + -GNumberOfTests=20 \ + -GMaxChainedDescriptors=1 \ + -GSimulationTimeoutCycles=2000 \ + -voptargs=+acc +#-voptargs=-pedantic + +set StdArithNoWarnings 1 +set NumericStdNoWarnings 1 +log -r /* + +source scripts/waves/vsim_fe_desc64.do + +run -all diff --git a/scripts/frontend/run-one.tcl b/scripts/frontend/run-one.tcl new file mode 100644 index 00000000..b7795cec --- /dev/null +++ b/scripts/frontend/run-one.tcl @@ -0,0 +1,21 @@ +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Axel Vanoni + +# run frontend tests with one transfer +source scripts/compile_vsim.tcl +vsim tb_idma_desc64_top -t 1ps -GNumberOfTests=1 \ + -GSimulationTimeoutCycles=200 \ + -GMaxChainedDescriptors=1 \ + -voptargs=+acc +#-voptargs=-pedantic + +set StdArithNoWarnings 1 +set NumericStdNoWarnings 1 +log -r /* + +source scripts/waves/vsim_fe_desc64.do + +run -all diff --git a/scripts/frontend/run-only-chain.tcl b/scripts/frontend/run-only-chain.tcl new file mode 100644 index 00000000..a318f7a5 --- /dev/null +++ b/scripts/frontend/run-only-chain.tcl @@ -0,0 +1,23 @@ +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Axel Vanoni + +# run tests with only chaining +source scripts/compile_vsim.tcl +vsim tb_idma_desc64_top -t 1ps \ + -GMaxChainedDescriptors=100 \ + -GMinChainedDescriptors=100 \ + -GSimulationTimeoutCycles=2000 \ + -GNumberOfTests=1 \ + -voptargs=+acc +#-voptargs=-pedantic + +set StdArithNoWarnings 1 +set NumericStdNoWarnings 1 +log -r /* + +source scripts/waves/vsim_fe_desc64.do + +run -all diff --git a/scripts/frontend/run.tcl b/scripts/frontend/run.tcl new file mode 100644 index 00000000..c053d5fd --- /dev/null +++ b/scripts/frontend/run.tcl @@ -0,0 +1,18 @@ +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Axel Vanoni + +# run frontend tests with default settings +source scripts/compile_vsim.tcl + +vsim tb_idma_desc64_top -t 1ps -voptargs=+acc +#-voptargs=-pedantic + +source scripts/waves/vsim_fe_desc64.do + +set StdArithNoWarnings 1 +set NumericStdNoWarnings 1 +log -r /* +run -all diff --git a/scripts/waves/vsim_fe_desc64.do b/scripts/waves/vsim_fe_desc64.do new file mode 100644 index 00000000..386c4978 --- /dev/null +++ b/scripts/waves/vsim_fe_desc64.do @@ -0,0 +1,17 @@ +onerror {resume} +quietly WaveActivateNextPane {} 0 +add wave -position end i_dut/clk_i +add wave -position end i_dut/rst_ni +add wave -position end i_dut/master_req_o +add wave -position end i_dut/master_rsp_i +add wave -position end i_dut/slave_req_i +add wave -position end i_dut/slave_rsp_o +add wave -position end i_dut/idma_req_o +add wave -position end i_dut/idma_req_ready_i +add wave -position end i_dut/idma_req_valid_o +add wave -position end i_dut/idma_rsp_ready_o +add wave -position end i_dut/idma_rsp_valid_i +add wave -position end i_dut/idma_busy_i +add wave -position end i_dut/irq_o + +quietly wave cursor active 1 diff --git a/src/frontends/desc64/idma_desc64_ar_gen.sv b/src/frontends/desc64/idma_desc64_ar_gen.sv new file mode 100644 index 00000000..e82da860 --- /dev/null +++ b/src/frontends/desc64/idma_desc64_ar_gen.sv @@ -0,0 +1,132 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "common_cells/assertions.svh" +`include "common_cells/registers.svh" + +/// This module generates AR packets to fetch descriptors from memory +module idma_desc64_ar_gen #( + /// AXI Data width + parameter int unsigned DataWidth = 64, + /// Descriptor type. `$bits(descriptor_t)` must be a power of two + parameter type descriptor_t = logic, + /// AXI AR channel type + parameter type axi_ar_chan_t = logic, + /// AXI AR id type + parameter type axi_id_t = logic, + /// Type that can hold the usage information of the idma_req fifo + parameter type usage_t = logic, + /// AXI Address type + parameter type addr_t = logic +)( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// AXI AR channel + output axi_ar_chan_t axi_ar_chan_o, + /// AXI AR valid + output logic axi_ar_chan_valid_o, + /// AXI AR ready + input logic axi_ar_chan_ready_i, + /// AXI ID to use when requesting + input axi_id_t axi_ar_id_i, + /// queued address to use when we reach the last in a chain + input addr_t queued_address_i, + /// queued address valid + input logic queued_address_valid_i, + /// queued address ready + output logic queued_address_ready_o, + /// next address as read from descriptor + input addr_t next_address_from_descriptor_i, + /// next address valid + input logic next_address_from_descriptor_valid_i, + /// number of available slots in the idma request fifo + input usage_t idma_req_available_slots_i, + /// address for feedback for the next request + output addr_t feedback_addr_o, + /// feedback address valid + output logic feedback_addr_valid_o, + /// whether the unit is busy + output logic busy_o +); + +`define MIN(a, b) ((a) < (b) ? a : b) + +localparam int unsigned DataWidthBytes = DataWidth / 8; +localparam int unsigned DescriptorSize = $bits(descriptor_t) / 8; + +localparam logic [2:0] AxiSize = `MIN(`MIN($clog2(DataWidthBytes), + $clog2(DescriptorSize)), 3'b111); +localparam logic [7:0] AxiLength = DescriptorSize / DataWidthBytes - 1; + +logic inflight_q, inflight_d; +logic next_addr_from_desc_valid_q, next_addr_from_desc_valid_d; +logic next_addr_from_desc_valid_this_cycle; +logic take_from_queued; +logic may_send_ar; +addr_t next_addr_q, next_addr_d; +addr_t ar_addr; + +assign next_addr_from_desc_valid_d = next_address_from_descriptor_valid_i; +assign next_addr_from_desc_valid_this_cycle = !next_addr_from_desc_valid_q && + next_address_from_descriptor_valid_i; + +assign next_addr_d = next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i : + next_addr_q; + +assign take_from_queued = (next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i == '1 : + next_addr_q == '1); + +assign ar_addr = take_from_queued ? queued_address_i : + (next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i : next_addr_q); + +assign may_send_ar = idma_req_available_slots_i > 0 && + (!inflight_q || next_addr_from_desc_valid_this_cycle); + +always_comb begin : proc_inflight + inflight_d = inflight_q; + if (axi_ar_chan_ready_i && axi_ar_chan_valid_o) begin + inflight_d = 1'b1; + end else if (next_addr_from_desc_valid_this_cycle) begin + inflight_d = 1'b0; + end +end + +always_comb begin : proc_ready_valid + axi_ar_chan_valid_o = 1'b0; + queued_address_ready_o = 1'b0; + if (may_send_ar) begin + if (take_from_queued) begin + axi_ar_chan_valid_o = queued_address_valid_i; + queued_address_ready_o = axi_ar_chan_ready_i; + end else begin + axi_ar_chan_valid_o = 1'b1; + end + end +end + +always_comb begin : proc_ar + axi_ar_chan_o = '0; + axi_ar_chan_o.id = axi_ar_id_i; + axi_ar_chan_o.addr = ar_addr; + axi_ar_chan_o.len = AxiLength; + axi_ar_chan_o.size = AxiSize; + axi_ar_chan_o.burst = axi_pkg::BURST_INCR; +end + +`FF(inflight_q, inflight_d, 1'b0); +`FF(next_addr_from_desc_valid_q, next_addr_from_desc_valid_d, 1'b0); +`FF(next_addr_q, next_addr_d, '1); + +assign feedback_addr_o = ar_addr; +assign feedback_addr_valid_o = axi_ar_chan_ready_i && axi_ar_chan_valid_o; +assign busy_o = !take_from_queued || inflight_q; + +endmodule : idma_desc64_ar_gen diff --git a/src/frontends/desc64/idma_desc64_ar_gen_prefetch.sv b/src/frontends/desc64/idma_desc64_ar_gen_prefetch.sv new file mode 100644 index 00000000..d2b1f142 --- /dev/null +++ b/src/frontends/desc64/idma_desc64_ar_gen_prefetch.sv @@ -0,0 +1,308 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "common_cells/assertions.svh" +`include "common_cells/registers.svh" + +/// This module generates AR packets to fetch descriptors from memory +module idma_desc64_ar_gen_prefetch #( + /// AXI Data width + parameter int unsigned DataWidth = 64, + /// How many descriptors may be prefetched + parameter int unsigned NSpeculation = 0, + /// Descriptor type. `$bits(descriptor_t)` must be a power of two + parameter type descriptor_t = logic, + /// AXI AR channel type + parameter type axi_ar_chan_t = logic, + /// AXI AR id type + parameter type axi_id_t = logic, + /// Type that can hold the usage information of the idma_req fifo + parameter type usage_t = logic, + /// AXI Address type + parameter type addr_t = logic, + /// Type that can hold how many descriptors to flush on the R channel. + /// Do not override. + parameter type flush_t = logic [$clog2(NSpeculation + 1)-1:0] +)( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// AXI AR channel + output axi_ar_chan_t axi_ar_chan_o, + /// AXI AR valid + output logic axi_ar_chan_valid_o, + /// AXI AR ready + input logic axi_ar_chan_ready_i, + /// AXI ID to use when requesting + input axi_id_t axi_ar_id_i, + /// queued address to use when we reach the last in a chain + input addr_t queued_address_i, + /// queued address valid + input logic queued_address_valid_i, + /// queued address ready + output logic queued_address_ready_o, + /// next address as read from descriptor + input addr_t next_address_from_descriptor_i, + /// next address valid + input logic next_address_from_descriptor_valid_i, + /// number of available slots in the idma request fifo + input usage_t idma_req_available_slots_i, + /// number of requests to flush on the R channel + output flush_t n_requests_to_flush_o, + /// if asserted, flush `n_requests_to_flush_o` on the R channel + output logic n_requests_to_flush_valid_o, + /// address for feedback for the next request + output addr_t feedback_addr_o, + /// feedback address valid + output logic feedback_addr_valid_o, + /// whether the unit is busy + output logic busy_o +); + +`define MIN(a, b) ((a) < (b) ? a : b) + +localparam int unsigned DataWidthBytes = DataWidth / 8; +localparam int unsigned DescriptorSize = $bits(descriptor_t) / 8; + +// We need the descriptor to have a power of two size for easy multiplication +// when calculating the next address +// pragma translate_off +`ASSERT_INIT(DescriptorSizeIsPowerOfTwo, (32'd1 << $clog2(DescriptorSize)) == DescriptorSize) +// pragma translate_on + +localparam logic [2:0] AxiSize = `MIN(`MIN($clog2(DataWidthBytes), + $clog2(DescriptorSize)), 3'b111); +localparam logic [7:0] AxiLength = DescriptorSize / DataWidthBytes - 1; + +localparam int unsigned SpeculationWidth = $clog2(NSpeculation + 1); +localparam int unsigned SpeculationUsageWidth = $clog2(NSpeculation); + +typedef struct packed { + logic speculative; + addr_t addr; +} addr_spec_t; + +addr_t base_addr_q, base_addr_d; +logic base_valid_q, base_valid_d; +logic take_from_next; + +logic unblocked; +logic next_addr_valid_q, next_addr_valid_d; +logic next_addr_valid_this_cycle; + +addr_spec_t next_ar; +logic next_ar_valid, next_ar_ready; + +addr_spec_t staging_addr; +logic staging_addr_valid_pending, staging_addr_ready_pending; +addr_t staging_addr_legalization; +logic staging_addr_valid_legalization, staging_addr_ready_legalization; +logic staging_addr_valid_speculation, staging_addr_ready_speculation; + +addr_t addr_out; + + +logic [SpeculationWidth:0] inflight_counter_q, inflight_counter_d; +logic flush; +logic flush_d, flush_q; +logic commit; +logic speculation_correct; +logic legalization_usage; +logic idma_enough_slots; +addr_t speculation_addr; +addr_t speculation_check_addr; +logic speculation_ready, speculation_valid; +logic [SpeculationUsageWidth-1:0] speculation_usage_short; +logic [SpeculationWidth-1:0] speculation_usage; + +assign take_from_next = base_valid_q && (next_address_from_descriptor_i != '1); + +assign unblocked = (NSpeculation > inflight_counter_q) && + (idma_req_available_slots_i > inflight_counter_q); + +assign next_ar_valid = unblocked && base_valid_q; +assign next_ar.speculative = inflight_counter_q > 0; +assign next_ar.addr = base_addr_q + (inflight_counter_q << $clog2(DescriptorSize)); + +assign staging_addr_valid_legalization = flush ? idma_req_available_slots_i > '0 && (next_address_from_descriptor_i == '1 ? + queued_address_valid_i : 1'b1) : + staging_addr_valid_pending && + ((staging_addr_ready_speculation && !flush_q) || + !staging_addr.speculative); +assign staging_addr_ready_pending = staging_addr_ready_legalization && + ((staging_addr_ready_speculation && !flush_q) || + !staging_addr.speculative) && + !flush; +assign staging_addr_valid_speculation = staging_addr_valid_pending && + staging_addr_ready_legalization && + staging_addr.speculative && + !flush && !flush_q; + +assign next_addr_valid_d = next_address_from_descriptor_valid_i; +assign next_addr_valid_this_cycle = next_address_from_descriptor_valid_i && !next_addr_valid_q; + +assign staging_addr_legalization = flush ? ( + next_address_from_descriptor_i == '1 ? queued_address_i : next_address_from_descriptor_i + ) : staging_addr.addr; + +assign speculation_check_addr = speculation_valid ? speculation_addr : next_ar.addr; + +assign speculation_correct = next_address_from_descriptor_i == '1 ? + (queued_address_valid_i && speculation_check_addr == queued_address_i) : + speculation_check_addr == next_address_from_descriptor_i; + +assign flush = next_addr_valid_this_cycle && !speculation_correct; +assign commit = next_addr_valid_this_cycle && speculation_correct; + +assign speculation_ready = commit; + +assign idma_enough_slots = idma_req_available_slots_i > inflight_counter_q && + inflight_counter_q < NSpeculation; + +// handle case of NSpeculation being power of 2 +always_comb begin : proc_usage + speculation_usage = speculation_usage_short; + // we can't distinguish between max and empty if readys and valids are on + // at the same time! + if (speculation_usage_short == '0 && speculation_valid) begin + speculation_usage = NSpeculation; + end +end + +always_comb begin : proc_base_valid + base_valid_d = base_valid_q; + if (queued_address_valid_i) begin + base_valid_d = 1'b1; + end else if (!queued_address_valid_i && + next_addr_valid_this_cycle && next_address_from_descriptor_i == '1) begin + base_valid_d = 1'b0; + end +end + +always_comb begin : proc_base_addr + base_addr_d = base_addr_q; + if (take_from_next && next_addr_valid_this_cycle) begin + if (next_addr_valid_this_cycle) begin + base_addr_d = next_address_from_descriptor_i; + end + end else if ((!take_from_next && next_addr_valid_this_cycle) || + !base_valid_q) begin + if (queued_address_valid_i) begin + base_addr_d = queued_address_i; + end + end +end + +always_comb begin : proc_inflight_counter + inflight_counter_d = inflight_counter_q; + if (flush) begin + inflight_counter_d = (staging_addr_valid_legalization && staging_addr_ready_legalization); + end else begin + inflight_counter_d = inflight_counter_q + (next_ar_valid && next_ar_ready) - commit; + end +end + +always_comb begin : proc_feedback_addr + // Normally, the next feedback address is the one we're commiting. + feedback_addr_o = speculation_addr; + feedback_addr_valid_o = commit && speculation_valid; + // After a flush or when starting fresh however, we have a first address + // that is known and doesn't pass through the speculation buffer. We need + // to pass that address through in that case. + if (!flush) begin + if (!staging_addr.speculative && + staging_addr_valid_legalization && + staging_addr_ready_legalization) begin + + feedback_addr_o = staging_addr.addr; + feedback_addr_valid_o = 1'b1; + end + end else begin + feedback_addr_o = staging_addr_legalization; + feedback_addr_valid_o = staging_addr_valid_legalization && staging_addr_ready_legalization; + end +end + +assign queued_address_ready_o = !take_from_next && (!base_valid_q || next_addr_valid_this_cycle); + +`FF(inflight_counter_q, inflight_counter_d, '0); +`FF(base_addr_q, base_addr_d, '0); +`FF(next_addr_valid_q, next_addr_valid_d, 1'b0); +`FF(base_valid_q, base_valid_d, 1'b0); +`FF(flush_q, flush_d, 1'b0); +assign flush_d = flush; +/* `FF(speculation_usage_q, speculation_usage_d, '0); */ +/* assign speculation_usage_d = flush ? '0 : speculation_usage; */ + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (NSpeculation), + .T (addr_t) +) i_speculation_fifo ( + .clk_i, + .rst_ni, + .flush_i (flush_q), + .testmode_i(1'b0), + .usage_o (speculation_usage_short), + .data_i (staging_addr.addr), + .valid_i (staging_addr_valid_speculation), + .ready_o (staging_addr_ready_speculation), + .data_o (speculation_addr), + .valid_o (speculation_valid), + .ready_i (speculation_ready) +); + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (NSpeculation), + .T (addr_spec_t) +) i_pending_ars ( + .clk_i, + .rst_ni, + .flush_i (flush), + .testmode_i(1'b0), + .usage_o ( /* unconnected */ ), + .data_i (next_ar), + .valid_i (next_ar_valid), + .ready_o (next_ar_ready), + .data_o (staging_addr), + .valid_o (staging_addr_valid_pending), + .ready_i (staging_addr_ready_pending) +); + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (1), + .T (addr_t) +) i_legalization_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (legalization_usage), + .data_i (staging_addr_legalization), + .valid_i (staging_addr_valid_legalization), + .ready_o (staging_addr_ready_legalization), + .data_o (addr_out), + .valid_o (axi_ar_chan_valid_o), + .ready_i (axi_ar_chan_ready_i) +); + +assign n_requests_to_flush_o = speculation_usage; +assign n_requests_to_flush_valid_o = flush; +assign busy_o = base_valid_q || inflight_counter_q > '0; + +always_comb begin : proc_ar + axi_ar_chan_o = '0; + axi_ar_chan_o.id = axi_ar_id_i; + axi_ar_chan_o.addr = addr_out; + axi_ar_chan_o.len = AxiLength; + axi_ar_chan_o.size = AxiSize; + axi_ar_chan_o.burst = axi_pkg::BURST_INCR; +end + +endmodule : idma_desc64_ar_gen_prefetch diff --git a/src/frontends/desc64/idma_desc64_reader.sv b/src/frontends/desc64/idma_desc64_reader.sv new file mode 100644 index 00000000..849c55b0 --- /dev/null +++ b/src/frontends/desc64/idma_desc64_reader.sv @@ -0,0 +1,177 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "common_cells/registers.svh" + +/// This module takes in an AXI R-channel, and reads descriptors from it. +/// Note that an using an address width other than 64 bits will need +/// modifications. +module idma_desc64_reader #( + /// Address width of the AXI bus + parameter int unsigned AddrWidth = 64, + /// Data width of the AXI bus + parameter int unsigned DataWidth = 64, + /// iDMA request type + parameter type idma_req_t = logic, + /// AXI R channel type + parameter type axi_r_chan_t = logic, + /// Configuration descriptor type + parameter type descriptor_t = logic, + /// AXI bus address type, derived from the address width + parameter type addr_t = logic [AddrWidth-1:0] +)( + /// clock + input logic clk_i, + /// reset + input logic rst_ni, + /// axi read channel + input axi_r_chan_t r_chan_i, + /// read channel valid + input logic r_chan_valid_i, + /// read channel ready + output logic r_chan_ready_o, + /// idma request + output idma_req_t idma_req_o, + /// idma request valid + output logic idma_req_valid_o, + /// idma request ready + /// NOTE: we assume that if a read was launched, + /// the connected fifo has still space left, i.e. this signal is always + /// 1 if a request is in-flight. If a request is in-flight and there + /// is not enough space in the fifo, we will either stall the bus or + /// drop the request. + input logic idma_req_ready_i, + /// location of the next descriptor address + output addr_t next_descriptor_addr_o, + /// whether next_descriptor_addr is valid + output logic next_descriptor_addr_valid_o, + /// whether this descriptor needs an IRQ raised + output logic do_irq_o, + /// whether do_irq_o is valid + output logic do_irq_valid_o, + /// whether a request is in-flight + output logic idma_req_inflight_o +); + +descriptor_t current_descriptor; + +if (DataWidth == 256) begin : gen_256_data_path + assign current_descriptor = r_chan_i.data; + assign idma_req_valid_o = r_chan_valid_i; + assign next_descriptor_addr_valid_o = r_chan_valid_i; + assign do_irq_valid_o = r_chan_valid_i; + assign idma_req_inflight_o = r_chan_valid_i; +end else if (DataWidth == 128) begin : gen_128_data_path + logic [127:0] first_half_of_descriptor_q, first_half_of_descriptor_d; + logic [127:0] second_half_of_descriptor; + logic irq_addr_valid_q, irq_addr_valid_d; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign next_descriptor_addr_valid_o = irq_addr_valid_q; + assign do_irq_valid_o = irq_addr_valid_q; + assign idma_req_inflight_o = r_chan_valid_i || irq_addr_valid_q; + + assign current_descriptor = descriptor_t'{ + first_half_of_descriptor_q, + second_half_of_descriptor + }; + + always_comb begin + first_half_of_descriptor_d = first_half_of_descriptor_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + first_half_of_descriptor_d = r_chan_i.data; + end + end + + always_comb begin + // the irq and next address fields are valid + // from receiving the first half until the + // second half was received + irq_addr_valid_d = irq_addr_valid_q; + if (r_chan_valid_i && r_chan_ready_o) begin + irq_addr_valid_d = !r_chan_i.last; + end + end + + `FF(first_half_of_descriptor_q, first_half_of_descriptor_d, 128'b0); + `FF(irq_addr_valid_q, irq_addr_valid_d, 1'b0); +end else if (DataWidth == 64) begin : gen_64_data_path + logic [1:0] fetch_counter_q, fetch_counter_d; + logic [2:0][63:0] descriptor_data_q, descriptor_data_d; + logic [63:0] descriptor_data_last; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign do_irq_valid_o = fetch_counter_q == 2'b01; + assign next_descriptor_addr_valid_o = fetch_counter_q == 2'b10; + assign descriptor_data_last = r_chan_i.data; + assign idma_req_inflight_o = fetch_counter_q != 2'b00; + + assign current_descriptor = { + descriptor_data_q[0], + descriptor_data_q[1], + descriptor_data_q[2], + descriptor_data_last + }; + + always_comb begin : proc_fetch_data + descriptor_data_d = descriptor_data_q; + fetch_counter_d = fetch_counter_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + descriptor_data_d[fetch_counter_q] = r_chan_i.data; + fetch_counter_d = fetch_counter_q + 2'b01; + end if (r_chan_valid_i && r_chan_i.last) begin + fetch_counter_d = 2'b00; + end + end + + `FF(descriptor_data_q, descriptor_data_d, 192'b0); + `FF(fetch_counter_q, fetch_counter_d, 2'b0); +end else if (DataWidth == 32) begin : gen_32_data_path + logic [2:0] fetch_counter_q, fetch_counter_d; + logic [6:0][31:0] descriptor_data_q, descriptor_data_d; + logic [31:0] descriptor_data_last; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign do_irq_valid_o = fetch_counter_q == 3'd2; + assign next_descriptor_addr_valid_o = fetch_counter_q == 3'd4; + assign descriptor_data_last = r_chan_i.data; + assign idma_req_inflight_o = fetch_counter_q != 3'd0; + + assign current_descriptor = { + descriptor_data_q, + descriptor_data_last + }; + + always_comb begin : proc_fetch_data + descriptor_data_d = descriptor_data_q; + fetch_counter_d = fetch_counter_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + descriptor_data_d[fetch_counter_q] = r_chan_i.data; + fetch_counter_d = fetch_counter_q + 3'b001; + end if (r_chan_valid_i && r_chan_i.last) begin + fetch_counter_d = 3'b0; + end + end +end + +idma_desc64_reshaper #( + .idma_req_t (idma_req_t), + .addr_t (addr_t), + .descriptor_t(descriptor_t) +) i_descriptor_reshaper ( + .descriptor_i (current_descriptor), + .idma_req_o, + .next_addr_o (next_descriptor_addr_o), + .do_irq_o +); + +// The user should take care that the connected fifo always has +// enough space to put in the new descriptor. If it does not, +// instead of dropping requests, stall the bus (unless we're +// dropping this descriptor). +assign r_chan_ready_o = idma_req_ready_i; + +endmodule : idma_desc64_reader diff --git a/src/frontends/desc64/idma_desc64_reader_gater.sv b/src/frontends/desc64/idma_desc64_reader_gater.sv new file mode 100644 index 00000000..a0ce2aea --- /dev/null +++ b/src/frontends/desc64/idma_desc64_reader_gater.sv @@ -0,0 +1,55 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "common_cells/registers.svh" + +module idma_desc64_reader_gater #( + parameter type flush_t = logic +)( + input logic clk_i, + input logic rst_ni, + input flush_t n_to_flush_i, + input logic n_to_flush_valid_i, + input logic r_valid_i, + output logic r_valid_o, + input logic r_ready_i, + output logic r_ready_o, + input logic r_last_i +); + +flush_t n_to_flush_q, n_to_flush_d; +logic flush; +logic engage_q, engage_d; + +assign flush = engage_q && (n_to_flush_q > '0 || (n_to_flush_valid_i && n_to_flush_i > '0)); + +// engange gating only after the last r transaction is done +always_comb begin + engage_d = engage_q; + if (n_to_flush_valid_i || n_to_flush_q == '0) begin + engage_d = 1'b0; + end else if (r_last_i && r_valid_i && r_ready_i) begin + engage_d = 1'b1; + end +end + +always_comb begin + n_to_flush_d = n_to_flush_q; + if (r_last_i && r_valid_i && n_to_flush_q > '0 && engage_q) begin + n_to_flush_d = n_to_flush_q - 1'b1; + end + if (n_to_flush_valid_i) begin + n_to_flush_d = n_to_flush_i; + end +end + +`FF(n_to_flush_q, n_to_flush_d, 'b0); +`FF(engage_q, engage_d, 'b0); + +assign r_valid_o = flush ? 1'b0 : r_valid_i; +assign r_ready_o = flush ? 1'b1 : r_ready_i; + +endmodule : idma_desc64_reader_gater diff --git a/src/frontends/desc64/idma_desc64_reg_wrapper.sv b/src/frontends/desc64/idma_desc64_reg_wrapper.sv index 24457b24..0f7ac051 100644 --- a/src/frontends/desc64/idma_desc64_reg_wrapper.sv +++ b/src/frontends/desc64/idma_desc64_reg_wrapper.sv @@ -16,22 +16,22 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( parameter type reg_req_t = logic, parameter type reg_rsp_t = logic ) ( - input logic clk_i , - input logic rst_ni , - input reg_req_t reg_req_i , - output reg_rsp_t reg_rsp_o , - output idma_desc64_reg2hw_t reg2hw_o , - input idma_desc64_hw2reg_t hw2reg_i , - input logic devmode_i , - input logic descriptor_fifo_ready_i, - output logic descriptor_fifo_valid_o + input logic clk_i , + input logic rst_ni , + input reg_req_t reg_req_i , + output reg_rsp_t reg_rsp_o , + output idma_desc64_reg2hw_t reg2hw_o , + input idma_desc64_hw2reg_t hw2reg_i , + input logic devmode_i , + output logic input_addr_valid_o, + input logic input_addr_ready_i ); import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; reg_req_t request; reg_rsp_t response; - logic descriptor_fifo_valid_q, descriptor_fifo_valid_d; + logic input_addr_valid_q, input_addr_valid_d; idma_desc64_reg_top #( .reg_req_t (reg_req_t), @@ -55,7 +55,7 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( always_comb begin if (reg_req_i.addr == IDMA_DESC64_DESC_ADDR_OFFSET) begin - request.valid = reg_req_i.valid && descriptor_fifo_ready_i; + request.valid = reg_req_i.valid && input_addr_ready_i; end else begin request.valid = reg_req_i.valid; end @@ -64,22 +64,22 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( always_comb begin // only take into account the fifo if a write is going to it if (reg_req_i.addr == IDMA_DESC64_DESC_ADDR_OFFSET) begin - reg_rsp_o.ready = response.ready && descriptor_fifo_ready_i; - descriptor_fifo_valid_o = descriptor_fifo_valid_q; + reg_rsp_o.ready = response.ready && input_addr_ready_i; + input_addr_valid_o = reg2hw_o.desc_addr.qe || input_addr_valid_q; end else begin reg_rsp_o.ready = response.ready; - descriptor_fifo_valid_o = '0; + input_addr_valid_o = '0; end end always_comb begin - descriptor_fifo_valid_d = descriptor_fifo_valid_q; - if (reg2hw_o.desc_addr.qe) begin - descriptor_fifo_valid_d = 1'b1; - end else if (descriptor_fifo_ready_i) begin - descriptor_fifo_valid_d = '0; + input_addr_valid_d = input_addr_valid_q; + if (reg2hw_o.desc_addr.qe && !input_addr_ready_i) begin + input_addr_valid_d = 1'b1; + end else if (input_addr_ready_i) begin + input_addr_valid_d = '0; end end - `FF(descriptor_fifo_valid_q, descriptor_fifo_valid_d, '0); + `FF(input_addr_valid_q, input_addr_valid_d, '0); endmodule diff --git a/src/frontends/desc64/idma_desc64_reshaper.sv b/src/frontends/desc64/idma_desc64_reshaper.sv new file mode 100644 index 00000000..213f61bb --- /dev/null +++ b/src/frontends/desc64/idma_desc64_reshaper.sv @@ -0,0 +1,62 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +/// This module reshapes the 256 bits of a descriptor into its corresponding +/// iDMA backend request +module idma_desc64_reshaper #( + parameter type idma_req_t = logic, + parameter type addr_t = logic, + parameter type descriptor_t = logic +)( + input descriptor_t descriptor_i, + output idma_req_t idma_req_o, + output addr_t next_addr_o, + output logic do_irq_o +); + +assign next_addr_o = descriptor_i.next; +assign do_irq_o = descriptor_i.flags[0]; + +always_comb begin + idma_req_o = '0; + + idma_req_o.length = descriptor_i.length; + idma_req_o.src_addr = descriptor_i.src_addr; + idma_req_o.dst_addr = descriptor_i.dest_addr; + + // Current backend only supports one ID + idma_req_o.opt.axi_id = descriptor_i.flags[23:16]; + idma_req_o.opt.src.burst = descriptor_i.flags[2:1]; + idma_req_o.opt.src.cache = descriptor_i.flags[11:8]; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.src.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.src.prot = '0; + // not participating in qos + idma_req_o.opt.src.qos = '0; + // only one region + idma_req_o.opt.src.region = '0; + idma_req_o.opt.dst.burst = descriptor_i.flags[4:3]; + idma_req_o.opt.dst.cache = descriptor_i.flags[15:12]; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.dst.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.dst.prot = '0; + // not participating in qos + idma_req_o.opt.dst.qos = '0; + // only one region in system + idma_req_o.opt.dst.region = '0; + idma_req_o.opt.beo.decouple_aw = descriptor_i.flags[6]; + idma_req_o.opt.beo.decouple_rw = descriptor_i.flags[5]; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.src_max_llen = '0; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.dst_max_llen = '0; + idma_req_o.opt.beo.src_reduce_len = descriptor_i.flags[7]; + idma_req_o.opt.beo.dst_reduce_len = descriptor_i.flags[7]; +end + +endmodule : idma_desc64_reshaper diff --git a/src/frontends/desc64/idma_desc64_shared_counter.sv b/src/frontends/desc64/idma_desc64_shared_counter.sv deleted file mode 100644 index 52da6481..00000000 --- a/src/frontends/desc64/idma_desc64_shared_counter.sv +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Axel Vanoni - -`include "common_cells/registers.svh" -/// This module allows two domains to share a counter -/// One end can increment the counter, the other can -/// decrement it. This can be used as a lightweight -/// FIFO if the only data that would be transmitted is 1 -/// Note that the counter wraps on overflow, but saturates -/// on underflow -module idma_desc64_shared_counter #( - parameter int unsigned CounterWidth = 4 -) ( - input logic clk_i , - input logic rst_ni , - /// Whether the internal counter should increment - input logic increment_i , - /// Whether the internal counter should decrement - input logic decrement_i , - /// Whether the internal counter is above zero - output logic greater_than_zero_o -); - -typedef logic [CounterWidth-1:0] counter_t; - -counter_t counter_d, counter_q; -`FF(counter_q, counter_d, '0); - -assign greater_than_zero_o = counter_q != '0; - -always_comb begin - counter_d = counter_q; - unique casez ({increment_i, decrement_i, counter_q != 0}) - 3'b11?: begin - counter_d = counter_q; - end - 3'b10?: begin - counter_d = counter_q + 1; - end - 3'b011: begin - counter_d = counter_q - 1; - end - 3'b010: begin - // don't underflow - counter_d = counter_q; - end - 3'b00?: begin - counter_d = counter_q; - end - default: ; - endcase -end - -endmodule diff --git a/src/frontends/desc64/idma_desc64_synth.sv b/src/frontends/desc64/idma_desc64_synth.sv index 1488d310..e5dcc04a 100644 --- a/src/frontends/desc64/idma_desc64_synth.sv +++ b/src/frontends/desc64/idma_desc64_synth.sv @@ -6,43 +6,69 @@ // synth wrapper module idma_desc64_synth #( - parameter int unsigned AddrWidth = idma_desc64_synth_pkg::AddrWidth, - parameter type burst_req_t = idma_desc64_synth_pkg::burst_req_t, - parameter type reg_rsp_t = idma_desc64_synth_pkg::reg_rsp_t, - parameter type reg_req_t = idma_desc64_synth_pkg::reg_req_t + parameter int unsigned AddrWidth = idma_desc64_synth_pkg::AddrWidth, + parameter int unsigned DataWidth = idma_desc64_synth_pkg::DataWidth, + parameter int unsigned AxiIdWidth = idma_desc64_synth_pkg::IdWidth, + parameter type idma_req_t = idma_desc64_synth_pkg::idma_req_t, + parameter type idma_rsp_t = idma_desc64_synth_pkg::idma_rsp_t, + parameter type axi_rsp_t = idma_desc64_synth_pkg::axi_rsp_t, + parameter type axi_req_t = idma_desc64_synth_pkg::axi_req_t, + parameter type axi_ar_chan_t = idma_desc64_synth_pkg::axi_ar_chan_t, + parameter type axi_r_chan_t = idma_desc64_synth_pkg::axi_r_chan_t, + parameter type reg_rsp_t = idma_desc64_synth_pkg::reg_rsp_t, + parameter type reg_req_t = idma_desc64_synth_pkg::reg_req_t, + parameter int unsigned InputFifoDepth = idma_desc64_synth_pkg::InputFifoDepth, + parameter int unsigned PendingFifoDepth = idma_desc64_synth_pkg::PendingFifoDepth )( - input logic clk_i, - input logic rst_ni, - output reg_req_t master_req_o, - input reg_rsp_t master_rsp_i, - input reg_req_t slave_req_i, - output reg_rsp_t slave_rsp_o, - output burst_req_t dma_be_req_o, - output logic dma_be_valid_o, - input logic dma_be_ready_i, - input logic dma_be_tx_complete_i, - input logic dma_be_idle_i, - output logic irq_o + input logic clk_i , + input logic rst_ni , + output axi_req_t master_req_o , + input axi_rsp_t master_rsp_i , + input logic [AxiIdWidth-1:0] axi_ar_id_i , + input logic [AxiIdWidth-1:0] axi_aw_id_i , + input reg_req_t slave_req_i , + output reg_rsp_t slave_rsp_o , + output idma_req_t idma_req_o , + output logic idma_req_valid_o, + input logic idma_req_ready_i, + input idma_rsp_t idma_rsp_i , + input logic idma_rsp_valid_i, + output logic idma_rsp_ready_o, + input logic idma_busy_i , + output logic irq_o ); - idma_desc64_top #( - .AddrWidth ( AddrWidth ), - .burst_req_t ( burst_req_t ), - .reg_rsp_t ( reg_rsp_t ), - .reg_req_t ( reg_req_t ) - ) i_idma_desc64 ( - .clk_i, - .rst_ni, - .master_req_o, - .master_rsp_i, - .slave_req_i, - .slave_rsp_o, - .dma_be_req_o, - .dma_be_valid_o, - .dma_be_ready_i, - .dma_be_tx_complete_i, - .dma_be_idle_i, - .irq_o - ); + idma_desc64_top #( + .AddrWidth, + .DataWidth, + .AxiIdWidth, + .idma_req_t, + .idma_rsp_t, + .axi_req_t, + .axi_rsp_t, + .axi_ar_chan_t, + .axi_r_chan_t, + .reg_req_t, + .reg_rsp_t, + .InputFifoDepth, + .PendingFifoDepth + ) i_dma_desc64 ( + .clk_i , + .rst_ni , + .master_req_o , + .master_rsp_i , + .axi_ar_id_i , + .axi_aw_id_i , + .slave_req_i , + .slave_rsp_o , + .idma_req_o , + .idma_req_valid_o, + .idma_req_ready_i, + .idma_rsp_i , + .idma_rsp_valid_i, + .idma_rsp_ready_o, + .idma_busy_i , + .irq_o + ); endmodule : idma_desc64_synth diff --git a/src/frontends/desc64/idma_desc64_synth_pkg.sv b/src/frontends/desc64/idma_desc64_synth_pkg.sv index ba08d9a4..66c23d01 100644 --- a/src/frontends/desc64/idma_desc64_synth_pkg.sv +++ b/src/frontends/desc64/idma_desc64_synth_pkg.sv @@ -8,33 +8,29 @@ package idma_desc64_synth_pkg; `include "register_interface/typedef.svh" - - localparam int unsigned AddrWidth = 64; - localparam int unsigned DataWidth = 64; - localparam int unsigned StrbWidth = DataWidth / 8; - localparam int unsigned OneDLength = 32; - localparam int unsigned IdWidth = 8; + `include "idma/typedef.svh" + + localparam int unsigned AddrWidth = 64; + localparam int unsigned DataWidth = 64; + localparam int unsigned StrbWidth = DataWidth / 8; + localparam int unsigned OneDLength = 32; + localparam int unsigned IdWidth = 8; + localparam int unsigned UserWidth = 1; + localparam int unsigned TFLenWidth = 32; + localparam int unsigned InputFifoDepth = 8; + localparam int unsigned PendingFifoDepth = 8; typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [StrbWidth-1:0] strb_t; typedef logic [OneDLength-1:0] length_t; typedef logic [IdWidth-1:0] id_t; + typedef logic [UserWidth-1:0] user_t; + typedef logic [TFLenWidth-1:0] tf_len_t; `REG_BUS_TYPEDEF_ALL(reg, addr_t, data_t, strb_t) - - typedef struct packed { - id_t id; - addr_t src; - addr_t dst; - length_t num_bytes; - axi_pkg::cache_t src_cache; - axi_pkg::cache_t dst_cache; - axi_pkg::burst_t src_burst; - axi_pkg::burst_t dst_burst; - logic decouple_rw; - logic deburst; - logic serialize; - } burst_req_t; + `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) endpackage : idma_desc64_synth_pkg diff --git a/src/frontends/desc64/idma_desc64_top.sv b/src/frontends/desc64/idma_desc64_top.sv index fdda4570..5874c5d9 100644 --- a/src/frontends/desc64/idma_desc64_top.sv +++ b/src/frontends/desc64/idma_desc64_top.sv @@ -5,531 +5,469 @@ // Axel Vanoni `include "common_cells/registers.svh" +`include "common_cells/assertions.svh" /// This module serves as a descriptor-based frontend for the iDMA in the CVA6-core module idma_desc64_top #( /// Width of the addresses parameter int unsigned AddrWidth = 64 , + /// Width of a data item on the AXI bus + parameter int unsigned DataWidth = 64 , + /// Width an AXI ID + parameter int unsigned AxiIdWidth = 3 , /// burst request type. See the documentation of the idma backend for details - parameter type burst_req_t = logic, + parameter type idma_req_t = logic, + /// burst response type. See the documentation of the idma backend for details + parameter type idma_rsp_t = logic, /// regbus interface types. Use the REG_BUS_TYPEDEF macros to define the types /// or see the idma backend documentation for more details parameter type reg_rsp_t = logic, parameter type reg_req_t = logic, + /// AXI interface types used for fetching descriptors. + /// Use the AXI_TYPEDEF_ALL macros to define the types + parameter type axi_rsp_t = logic, + parameter type axi_req_t = logic, + parameter type axi_ar_chan_t = logic, + parameter type axi_r_chan_t = logic, /// Specifies the depth of the fifo behind the descriptor address register - parameter int unsigned InputFifoDepth = 8, + parameter int unsigned InputFifoDepth = 8, /// Specifies the buffer size of the fifo that tracks requests submitted to the backend - parameter int unsigned PendingFifoDepth = 8, - /// Specifies the counter width of the buffer that tracks completions delivered by the backend - parameter int unsigned TxDoneBufferWidth = 5 + parameter int unsigned PendingFifoDepth = 8, + /// How many requests the backend might have at the same time in its buffers. + /// Usually, `NumAxInFlight + BufferDepth` + parameter int unsigned BackendDepth = 0, + /// Specifies how many descriptors may be fetched speculatively + parameter int unsigned NSpeculation = 4 )( /// clock - input logic clk_i , + input logic clk_i , /// reset - input logic rst_ni , + input logic rst_ni , - /// regbus interface + /// axi interface used for fetching descriptors /// master pair /// master request - output reg_req_t master_req_o , + output axi_req_t master_req_o , /// master response - input reg_rsp_t master_rsp_i , + input axi_rsp_t master_rsp_i , + /// ID to be used by the read channel + input logic [AxiIdWidth-1:0] axi_ar_id_i , + /// ID to be used by the write channel + input logic [AxiIdWidth-1:0] axi_aw_id_i , + /// regbus interface /// slave pair /// The slave interface exposes two registers: One address register to /// write a descriptor address to process and a status register that /// exposes whether the DMA is busy on bit 0 and whether FIFOs are full /// on bit 1. /// master request - input reg_req_t slave_req_i , + input reg_req_t slave_req_i , /// master response - output reg_rsp_t slave_rsp_o , + output reg_rsp_t slave_rsp_o , /// backend interface /// burst request submission /// burst request data. See iDMA backend documentation for fields - output burst_req_t dma_be_req_o , + output idma_req_t idma_req_o , /// valid signal for the backend data submission - output logic dma_be_valid_o , + output logic idma_req_valid_o, /// ready signal for the backend data submission - input logic dma_be_ready_i , + input logic idma_req_ready_i, /// status information from the backend - /// event: when a transfer has completed - input logic dma_be_tx_complete_i, - /// whether the backend is currently idle - input logic dma_be_idle_i , + input idma_rsp_t idma_rsp_i , + /// valid signal for the backend response + input logic idma_rsp_valid_i, + /// ready signal for the backend response + output logic idma_rsp_ready_o, + /// whether the backend is currently busy + input logic idma_busy_i , /// Event: irq - output logic irq_o + output logic irq_o ); - import idma_desc64_reg_pkg::*; - import axi_pkg::BURST_INCR; - - // {{{ typedefs and parameters - typedef logic [AddrWidth-1:0] addr_t; - - /// Descriptor layout - typedef struct packed { - /// Flags for this request. Currently, the following are defined: - /// bit 0 set to trigger an irq on completion, unset to not be notified - /// bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 - /// bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 - /// for a description of these modes, check AXI-Pulp documentation - /// bit 5 set to decouple reads and writes in the backend - /// bit 6 set to serialize requests. Not setting might violate AXI spec - /// bit 7 set to deburst (each burst is split into own transfer) - /// for a more thorough description, refer to the iDMA backend documentation - /// bits 11:8 Bitfield for AXI cache attributes for the source - /// bits 15:12 Bitfield for AXI cache attributes for the destination - /// bits of the bitfield (refer to AXI-Pulp for a description): - /// bit 0: cache bufferable - /// bit 1: cache modifiable - /// bit 2: cache read alloc - /// bit 3: cache write alloc - /// bits 23:16 AXI ID used for the transfer - /// bits 31:24 unused/reserved - logic [31:0] flags; - /// length of request in bytes - logic [31:0] length; - /// address of next descriptor, 0xFFFF_FFFF_FFFF_FFFF for last descriptor in chain - addr_t next; - /// source address to copy from - addr_t src_addr; - /// destination address to copy to - addr_t dest_addr; - } descriptor_t; - - typedef struct packed { - logic do_irq; - addr_t descriptor_addr; - } addr_irq_t; - - localparam addr_t AddressSentinel = ~'0; - - typedef enum logic [1:0] { - SubmitterIdle = '0, - SubmitterFetchDescriptor, - SubmitterSendToBE - } submitter_e; - - typedef enum logic [1:0] { - FeedbackIdle, - FeedbackWaitingOnBackend, - FeedbackUpdateMemory, - FeedbackRaiseIRQ - } feedback_fsm_e; - - // }}} typedefs and parameters - - // {{{ signal declarations - - // {{{ descriptor addr input to fifo - idma_desc64_reg2hw_t register_file_to_hw; - idma_desc64_hw2reg_t register_file_to_reg; - - addr_t desc_addr_to_input_fifo_data; - logic desc_addr_to_input_fifo_valid; - logic desc_addr_to_input_fifo_ready; - - addr_t desc_addr_from_input_fifo_data; - logic desc_addr_from_input_fifo_valid; - logic desc_addr_from_input_fifo_ready; - - logic [2:0] desc_addr_fifo_usage; - // }}} descriptor addr input to fifo - - // {{{ pending descriptor FIFO - addr_irq_t pending_descriptor_to_fifo_data; - logic pending_descriptor_to_fifo_valid; - logic pending_descriptor_to_fifo_ready; - - addr_irq_t pending_descriptor_from_fifo_data; - logic pending_descriptor_from_fifo_valid; - logic pending_descriptor_from_fifo_ready; - // }}} pending descriptor FIFO - - // {{{ submitter FSM - // state - submitter_e submitter_q, submitter_d; - logic [1:0] submitter_fetch_counter_q, submitter_fetch_counter_d; - // data - addr_t submitter_current_addr_q, submitter_current_addr_d; - descriptor_t submitter_current_descriptor_q, submitter_current_descriptor_d; - burst_req_t submitter_burst_req; - // register_interface master - reg_req_t submitter_master_req; - reg_rsp_t submitter_master_rsp; - // ready-valid signals - logic submitter_input_fifo_ready; - logic submitter_input_fifo_valid; - logic submitter_burst_valid_q, submitter_burst_valid_d; - logic submitter_pending_fifo_valid_q, submitter_pending_fifo_valid_d; - // }}} submitter FSM - - // {{{ instantiated modules - logic completion_counter_decrement; - logic completion_counter_has_items; - // }}} instantiated modules - - // {{{ feedback FSM - // state - feedback_fsm_e feedback_fsm_q, feedback_fsm_d; - // data - addr_irq_t feedback_addr_irq_q, feedback_addr_irq_d; - logic feedback_irq_q, feedback_irq_d; - // register_interface master - reg_req_t feedback_master_req_q, feedback_master_req_d; - reg_rsp_t feedback_master_rsp; - // ready-valid signals - logic feedback_pending_descriptor_ready_q, feedback_pending_descriptor_ready_d; - logic feedback_counter_ready_q, feedback_counter_ready_d; - // }}} feedback FSM - - // }}} signal declarations - - // {{{ combinatorial processes - - // {{{ descriptor addr input to fifo - assign desc_addr_to_input_fifo_data = register_file_to_hw.desc_addr.q; - // }}} descriptor addr input to fifo - - // {{{ submitter FSM - assign desc_addr_from_input_fifo_ready = submitter_q == SubmitterIdle; - assign submitter_input_fifo_valid = desc_addr_from_input_fifo_valid; - - assign pending_descriptor_to_fifo_valid = submitter_pending_fifo_valid_q; - assign submitter_master_req.addr = submitter_current_addr_q + (submitter_fetch_counter_q << 3); - assign submitter_master_req.write = '0; - assign submitter_master_req.wdata = '0; - assign submitter_master_req.wstrb = '0; - assign submitter_master_req.valid = submitter_q == SubmitterFetchDescriptor; - - assign pending_descriptor_to_fifo_data.do_irq = submitter_current_descriptor_q.flags[0]; - assign pending_descriptor_to_fifo_data.descriptor_addr = submitter_current_addr_q; - - always_comb begin : proc_submitter_burst_req - submitter_burst_req = '0; - - submitter_burst_req.length = submitter_current_descriptor_q.length; - submitter_burst_req.src_addr = submitter_current_descriptor_q.src_addr; - submitter_burst_req.dst_addr = submitter_current_descriptor_q.dest_addr; - - // Current backend only supports one ID - submitter_burst_req.opt.axi_id = submitter_current_descriptor_q.flags[23:16]; - submitter_burst_req.opt.src.burst = submitter_current_descriptor_q.flags[2:1]; - submitter_burst_req.opt.src.cache = submitter_current_descriptor_q.flags[11:8]; - // AXI4 does not support locked transactions, use atomics - submitter_burst_req.opt.src.lock = '0; - // unpriviledged, secure, data access - submitter_burst_req.opt.src.prot = '0; - // not participating in qos - submitter_burst_req.opt.src.qos = '0; - // only one region - submitter_burst_req.opt.src.region = '0; - submitter_burst_req.opt.dst.burst = submitter_current_descriptor_q.flags[4:3]; - submitter_burst_req.opt.dst.cache = submitter_current_descriptor_q.flags[15:12]; - // AXI4 does not support locked transactions, use atomics - submitter_burst_req.opt.dst.lock = '0; - // unpriviledged, secure, data access - submitter_burst_req.opt.dst.prot = '0; - // not participating in qos - submitter_burst_req.opt.dst.qos = '0; - // only one region in system - submitter_burst_req.opt.dst.region = '0; - // ensure coupled AW to avoid deadlocks - submitter_burst_req.opt.beo.decouple_aw = '0; - submitter_burst_req.opt.beo.decouple_rw = submitter_current_descriptor_q.flags[5]; - // this frontend currently only supports completely debursting - submitter_burst_req.opt.beo.src_max_llen = '0; - // this frontend currently only supports completely debursting - submitter_burst_req.opt.beo.dst_max_llen = '0; - submitter_burst_req.opt.beo.src_reduce_len = submitter_current_descriptor_q.flags[7]; - submitter_burst_req.opt.beo.dst_reduce_len = submitter_current_descriptor_q.flags[7]; - // serialization no longer supported - // submitter_burst_req.serialize = submitter_current_descriptor_q.flags[6]; +/// Specifies how many unsent AWs/Ws are allowed +localparam int unsigned MaxAWWPending = BackendDepth; + +typedef logic [AddrWidth-1:0] addr_t; + +/// Descriptor layout +typedef struct packed { + /// Flags for this request. Currently, the following are defined: + /// bit 0 set to trigger an irq on completion, unset to not be notified + /// bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 + /// bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 + /// for a description of these modes, check AXI-Pulp documentation + /// bit 5 set to decouple reads and writes in the backend + /// bit 6 set to serialize requests. Not setting might violate AXI spec + /// bit 7 set to deburst (each burst is split into own transfer) + /// for a more thorough description, refer to the iDMA backend documentation + /// bits 11:8 Bitfield for AXI cache attributes for the source + /// bits 15:12 Bitfield for AXI cache attributes for the destination + /// bits of the bitfield (refer to AXI-Pulp for a description): + /// bit 0: cache bufferable + /// bit 1: cache modifiable + /// bit 2: cache read alloc + /// bit 3: cache write alloc + /// bits 23:16 AXI ID used for the transfer + /// bits 31:24 unused/reserved + logic [31:0] flags; + /// length of request in bytes + logic [31:0] length; + /// address of next descriptor, 0xFFFF_FFFF_FFFF_FFFF for last descriptor in chain + addr_t next; + /// source address to copy from + addr_t src_addr; + /// destination address to copy to + addr_t dest_addr; +} descriptor_t; + +typedef logic [$clog2(NSpeculation + 1)-1:0] flush_t; + +idma_req_t idma_req; +logic idma_req_valid; +logic idma_req_ready; +logic idma_req_inflight; +logic gated_r_valid, gated_r_ready; + +logic do_irq; +logic do_irq_valid; +logic do_irq_ready; + +addr_t queued_addr; +logic queued_addr_valid; +logic queued_addr_ready; +addr_t next_addr_from_desc; +logic next_addr_from_desc_valid; +logic ar_busy; +addr_t feedback_addr; +logic feedback_addr_valid; +logic feedback_addr_ready; +addr_t next_wb_addr; +logic next_wb_addr_valid; +logic next_wb_addr_ready; + +`define MAX(a, b) (a) > (b) ? a : b + +localparam int unsigned PendingFifoDepthBits = `MAX($clog2(PendingFifoDepth), 1); + +logic [PendingFifoDepthBits-1:0] idma_req_used; +logic [PendingFifoDepthBits:0] idma_req_available; + +logic [1:0] ws_per_writeback; +// one bit extra for the 32 bit case +logic [$clog2(MaxAWWPending):0] w_counter_q, w_counter_d; +logic aw_tx; +logic w_tx; + +flush_t n_requests_to_flush; +logic n_requests_to_flush_valid; + +addr_t input_addr; +logic input_addr_valid, input_addr_ready; + +logic do_irq_out; + +idma_desc64_reg_pkg::idma_desc64_reg2hw_t reg2hw; +idma_desc64_reg_pkg::idma_desc64_hw2reg_t hw2reg; + +addr_t aw_addr; + +always_comb begin : proc_available + idma_req_available = PendingFifoDepth - idma_req_used - idma_req_inflight; + if (idma_req_used == '0) begin + if (idma_req_ready) begin + idma_req_available = PendingFifoDepth - idma_req_inflight; + end else begin + idma_req_available = '0; + end + end else if (idma_req_used == PendingFifoDepth && idma_req_inflight) begin + idma_req_available = '0; + end +end + +always_comb begin : proc_aw + master_req_o.aw = '0; + master_req_o.aw.id = axi_aw_id_i; + master_req_o.aw.addr = aw_addr; + master_req_o.aw.size = (DataWidth == 32) ? 3'b010 : 3'b011; + master_req_o.aw.len = (DataWidth == 32) ? 'b1 : 'b0; +end + +assign master_req_o.w_valid = w_counter_q > 0; +assign aw_tx = master_req_o.aw_valid && master_rsp_i.aw_ready; +assign w_tx = master_req_o.w_valid && master_rsp_i.w_ready; + +always_comb begin : proc_w_counter + w_counter_d = w_counter_q; + if (aw_tx && w_tx) begin + w_counter_d = w_counter_q + ws_per_writeback - 'b1; + end else if (aw_tx) begin + w_counter_d = w_counter_q + ws_per_writeback; + end else if (w_tx) begin + w_counter_d = w_counter_q - 'b1; + end +end + +if (DataWidth == 32) begin : gen_aw_w_chan_32 + logic w_is_last_q, w_is_last_d; + assign ws_per_writeback = 2'd2; + // writeback is 64 bits, so toggle last after sending one word + always_comb begin : proc_is_last + w_is_last_d = w_is_last_q; + if (master_req_o.w_valid && master_rsp_i.w_ready) begin + w_is_last_d = !w_is_last_q; + end + end + + always_comb begin : proc_w + master_req_o.w = '0; + master_req_o.w.data = '1; + master_req_o.w.strb = 4'hf; + master_req_o.w.last = w_is_last_q; end + `FF(w_is_last_q, w_is_last_d, 1'b0) +end else begin : gen_aw_w_chan + assign ws_per_writeback = 2'd1; + always_comb begin : proc_w + master_req_o.w = '0; + master_req_o.w.data = '0; + master_req_o.w.data[63:0] = 64'hffff_ffff_ffff_ffff; + master_req_o.w.strb = 'hff; + master_req_o.w.last = 1'b1; + end +end + +assign hw2reg.status.busy.d = queued_addr_valid || + next_wb_addr_valid || + idma_req_valid_o || + master_req_o.b_ready || + master_req_o.aw_valid || + w_counter_q > 0 || + idma_busy_i || + ar_busy; + +assign hw2reg.status.busy.de = 1'b1; +assign hw2reg.status.fifo_full.d = !input_addr_ready; +assign hw2reg.status.fifo_full.de = 1'b1; + +assign input_addr = reg2hw.desc_addr.q; + +idma_desc64_reg_wrapper #( + .reg_req_t(reg_req_t), + .reg_rsp_t(reg_rsp_t) +) i_reg_wrapper ( + .clk_i, + .rst_ni, + .reg_req_i (slave_req_i), + .reg_rsp_o (slave_rsp_o), + .reg2hw_o (reg2hw), + .hw2reg_i (hw2reg), + .devmode_i (1'b0), + .input_addr_valid_o (input_addr_valid), + .input_addr_ready_i (input_addr_ready) +); + +if (NSpeculation == 0) begin +assign n_requests_to_flush = '0; +assign n_requests_to_flush_valid = '0; +assign master_req_o.r_ready = gated_r_ready; +assign gated_r_valid = master_rsp_i.r_valid; +idma_desc64_ar_gen #( + .DataWidth (DataWidth), + .descriptor_t (descriptor_t), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_id_t (logic [AxiIdWidth-1:0]), + .usage_t (logic [$bits(idma_req_available)-1:0]), + .addr_t (addr_t) +) i_ar_gen ( + .clk_i, + .rst_ni, + .axi_ar_chan_o (master_req_o.ar), + .axi_ar_chan_valid_o (master_req_o.ar_valid), + .axi_ar_chan_ready_i (master_rsp_i.ar_ready), + .axi_ar_id_i, + .queued_address_i (queued_addr), + .queued_address_valid_i (queued_addr_valid), + .queued_address_ready_o (queued_addr_ready), + .next_address_from_descriptor_i (next_addr_from_desc), + .next_address_from_descriptor_valid_i(next_addr_from_desc_valid), + .idma_req_available_slots_i (idma_req_available), + .feedback_addr_o (feedback_addr), + .feedback_addr_valid_o (feedback_addr_valid), + .busy_o (ar_busy) +); +end else begin +idma_desc64_ar_gen_prefetch #( + .DataWidth (DataWidth), + .NSpeculation (NSpeculation), + .descriptor_t (descriptor_t), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_id_t (logic [AxiIdWidth-1:0]), + .usage_t (logic [$bits(idma_req_available)-1:0]), + .addr_t (addr_t), + .flush_t (flush_t) +) i_ar_gen ( + .clk_i, + .rst_ni, + .axi_ar_chan_o (master_req_o.ar), + .axi_ar_chan_valid_o (master_req_o.ar_valid), + .axi_ar_chan_ready_i (master_rsp_i.ar_ready), + .axi_ar_id_i, + .queued_address_i (queued_addr), + .queued_address_valid_i (queued_addr_valid), + .queued_address_ready_o (queued_addr_ready), + .next_address_from_descriptor_i (next_addr_from_desc), + .next_address_from_descriptor_valid_i(next_addr_from_desc_valid), + .idma_req_available_slots_i (idma_req_available), + .n_requests_to_flush_o (n_requests_to_flush), + .n_requests_to_flush_valid_o (n_requests_to_flush_valid), + .feedback_addr_o (feedback_addr), + .feedback_addr_valid_o (feedback_addr_valid), + .busy_o (ar_busy) +); + +idma_desc64_reader_gater #( + .flush_t(flush_t) +) i_reader_gater ( + .clk_i, + .rst_ni, + .n_to_flush_i (n_requests_to_flush), + .n_to_flush_valid_i(n_requests_to_flush_valid), + .r_valid_i (master_rsp_i.r_valid), + .r_ready_o (master_req_o.r_ready), + .r_valid_o (gated_r_valid), + .r_ready_i (gated_r_ready), + .r_last_i (master_rsp_i.r.last) +); + + +end + +idma_desc64_reader #( + .AddrWidth (AddrWidth), + .DataWidth (DataWidth), + .idma_req_t (idma_req_t), + .descriptor_t(descriptor_t), + .axi_r_chan_t(axi_r_chan_t) +) i_reader ( + .clk_i, + .rst_ni, + .r_chan_i (master_rsp_i.r), + .r_chan_valid_i (gated_r_valid), + .r_chan_ready_o (gated_r_ready), + .idma_req_o (idma_req), + .idma_req_valid_o (idma_req_valid), + .idma_req_ready_i (idma_req_ready), + .next_descriptor_addr_o (next_addr_from_desc), + .next_descriptor_addr_valid_o(next_addr_from_desc_valid), + .do_irq_o (do_irq), + .do_irq_valid_o (do_irq_valid), + .idma_req_inflight_o (idma_req_inflight) +); + +stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (InputFifoDepth), + .T (addr_t) +) i_input_addr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (input_addr), + .valid_i (input_addr_valid), + .ready_o (input_addr_ready), + .data_o (queued_addr), + .valid_o (queued_addr_valid), + .ready_i (queued_addr_ready) +); + +stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (PendingFifoDepth + BackendDepth), + .T (addr_t) +) i_pending_addr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (feedback_addr), + .valid_i (feedback_addr_valid), + .ready_o (feedback_addr_ready), + .data_o (next_wb_addr), + .valid_o (next_wb_addr_valid), + .ready_i (next_wb_addr_ready && idma_rsp_valid_i) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (PendingFifoDepth), + .T (idma_req_t) +) i_idma_request_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (idma_req_used), + .data_i (idma_req), + .valid_i (idma_req_valid), + .ready_o (idma_req_ready), + .data_o (idma_req_o), + .valid_o (idma_req_valid_o), + .ready_i (idma_req_ready_i) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (PendingFifoDepth + MaxAWWPending + BackendDepth), + .T (logic) +) i_irq_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (do_irq), + .valid_i (do_irq_valid), + .ready_o (do_irq_ready), + .data_o (do_irq_out), + .valid_o (master_req_o.b_ready), + .ready_i (master_rsp_i.b_valid) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (MaxAWWPending), + .T (addr_t) +) i_aw_addrs ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (next_wb_addr), + .valid_i (next_wb_addr_valid && idma_rsp_valid_i), + .ready_o (next_wb_addr_ready), + .data_o (aw_addr), + .valid_o (master_req_o.aw_valid), + .ready_i (master_rsp_i.aw_ready) +); + +`FF(w_counter_q, w_counter_d, '0); + + +assign idma_rsp_ready_o = next_wb_addr_ready && next_wb_addr_valid; +assign irq_o = do_irq_out && master_req_o.b_ready && master_rsp_i.b_valid; - always_comb begin : submitter_fsm - submitter_d = submitter_q; - submitter_current_addr_d = submitter_current_addr_q; - submitter_current_descriptor_d = submitter_current_descriptor_q; - submitter_burst_valid_d = submitter_burst_valid_q; - submitter_pending_fifo_valid_d = submitter_pending_fifo_valid_q; - submitter_fetch_counter_d = submitter_fetch_counter_q; - - unique case (submitter_q) - SubmitterIdle: begin - if (submitter_input_fifo_valid) begin - submitter_current_addr_d = desc_addr_from_input_fifo_data; - - submitter_d = SubmitterFetchDescriptor; - submitter_fetch_counter_d = '0; - end - end - SubmitterFetchDescriptor: begin - if (submitter_master_rsp.ready) begin - submitter_fetch_counter_d = submitter_fetch_counter_q + 1; - unique case (submitter_fetch_counter_q) - 2'b00: begin - submitter_current_descriptor_d.flags = submitter_master_rsp.rdata[63:32]; - submitter_current_descriptor_d.length = submitter_master_rsp.rdata[31:0]; - end - 2'b01: begin - submitter_current_descriptor_d.next = submitter_master_rsp.rdata; - end - 2'b10: begin - submitter_current_descriptor_d.src_addr = submitter_master_rsp.rdata; - end - 2'b11: begin - submitter_current_descriptor_d.dest_addr = submitter_master_rsp.rdata; - submitter_fetch_counter_d = '0; - submitter_d = SubmitterSendToBE; - submitter_burst_valid_d = 1'b1; - submitter_pending_fifo_valid_d = 1'b1; - end - default: begin - submitter_d = submitter_e'('X); - submitter_current_addr_d = 'X; - submitter_current_descriptor_d = 'X; - submitter_burst_valid_d = 'X; - submitter_pending_fifo_valid_d = 'X; - submitter_fetch_counter_d = 'X; - end - endcase - end - end - SubmitterSendToBE: begin - // Unset valid once the ready signal came. We can't use !ready, - // as we might be waiting on the other signal, while the - // first ready goes low again, marking our signal erroniously as valid. - if (pending_descriptor_to_fifo_ready) submitter_pending_fifo_valid_d = 1'b0; - if (dma_be_ready_i) submitter_burst_valid_d = 1'b0; - - if ((submitter_burst_valid_q == 1'b0 || dma_be_ready_i == 1'b1) && - (submitter_pending_fifo_valid_q == 1'b0 || pending_descriptor_to_fifo_ready == 1'b1)) begin - - submitter_current_descriptor_d = '0; - - if (submitter_current_descriptor_q.next == AddressSentinel) begin - submitter_d = SubmitterIdle; - end else begin - submitter_d = SubmitterFetchDescriptor; - submitter_current_addr_d = submitter_current_descriptor_q.next; - submitter_fetch_counter_d = '0; - end - end - end - default: begin - submitter_d = submitter_e'('X); - submitter_current_addr_d = 'X; - submitter_current_descriptor_d = 'X; - submitter_burst_valid_d = 'X; - submitter_pending_fifo_valid_d = 'X; - submitter_fetch_counter_d = 'X; - end - endcase - end : submitter_fsm - // }}} submitter FSM - - // {{{ feedback FSM - assign pending_descriptor_from_fifo_ready = feedback_pending_descriptor_ready_q; - assign completion_counter_decrement = feedback_counter_ready_q; - - always_comb begin : feedback_fsm - feedback_fsm_d = feedback_fsm_q; - feedback_addr_irq_d = feedback_addr_irq_q; - feedback_master_req_d = feedback_master_req_q; - feedback_irq_d = '0; - feedback_pending_descriptor_ready_d = '0; - feedback_counter_ready_d = '0; - - unique case (feedback_fsm_q) - FeedbackIdle: begin - feedback_pending_descriptor_ready_d = 1'b1; - if (pending_descriptor_from_fifo_valid) begin - feedback_addr_irq_d = pending_descriptor_from_fifo_data; - - feedback_fsm_d = FeedbackWaitingOnBackend; - end - end - FeedbackWaitingOnBackend: begin - if (completion_counter_has_items) begin - feedback_counter_ready_d = 1'b1; - feedback_fsm_d = FeedbackUpdateMemory; - end - end - FeedbackUpdateMemory: begin - if (feedback_master_req_q.valid == '0) begin - // overwrite the flags and length fields with all 1s - // to mark it as completed - feedback_master_req_d.addr = feedback_addr_irq_q.descriptor_addr; - feedback_master_req_d.write = 1'b1; - feedback_master_req_d.wdata = ~'0; - feedback_master_req_d.wstrb = ~'0; - feedback_master_req_d.valid = 1'b1; - end else if (feedback_master_rsp.ready == 1'b1) begin - feedback_master_req_d.write = '0; - feedback_master_req_d.valid = '0; - if (feedback_addr_irq_q.do_irq) begin - feedback_fsm_d = FeedbackRaiseIRQ; - end else begin - feedback_fsm_d = FeedbackIdle; - end - end - end - FeedbackRaiseIRQ: begin - feedback_irq_d = 1'b1; - feedback_fsm_d = FeedbackIdle; - end - default: begin - feedback_fsm_d = feedback_fsm_e'('X); - feedback_addr_irq_d = 'X; - feedback_master_req_d = 'X; - feedback_irq_d = 'X; - feedback_pending_descriptor_ready_d = 'X; - feedback_counter_ready_d = 'X; - end - endcase - end : feedback_fsm - // }}} feedback FSM - - // {{{ status update - assign register_file_to_reg.status.busy.d = (submitter_q != SubmitterIdle || - feedback_fsm_q != FeedbackIdle || - !dma_be_idle_i); - assign register_file_to_reg.status.busy.de = 1'b1; - - // leave a bit of wiggle room for the previous registers to catch up - assign register_file_to_reg.status.fifo_full.d = desc_addr_fifo_usage > 6; - assign register_file_to_reg.status.fifo_full.de = 1'b1; - // }}} status update - - // }}} combinatorial processes - - // {{{ instantiated modules - - // {{{ descriptor addr input to fifo - stream_fifo #( - .DATA_WIDTH (64) , - .DEPTH (InputFifoDepth) - ) i_descriptor_input_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0) , - .testmode_i (1'b0) , - .usage_o (desc_addr_fifo_usage) , - // input port - .data_i (desc_addr_to_input_fifo_data) , - .valid_i (desc_addr_to_input_fifo_valid) , - .ready_o (desc_addr_to_input_fifo_ready) , - // output port - .data_o (desc_addr_from_input_fifo_data) , - .valid_o (desc_addr_from_input_fifo_valid), - .ready_i (desc_addr_from_input_fifo_ready) - ); - idma_desc64_reg_wrapper #( - .reg_req_t (reg_req_t), - .reg_rsp_t (reg_rsp_t) - ) i_register_file_controller ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .reg_req_i (slave_req_i) , - .reg_rsp_o (slave_rsp_o) , - .reg2hw_o (register_file_to_hw) , - .hw2reg_i (register_file_to_reg) , - .devmode_i (1'b1) , - .descriptor_fifo_ready_i(desc_addr_to_input_fifo_ready), - .descriptor_fifo_valid_o(desc_addr_to_input_fifo_valid) - ); - // }}} descriptor addr input to fifo - - // {{{ pending descriptor FIFO - stream_fifo #( - .T (addr_irq_t) , - .DEPTH (PendingFifoDepth) - ) i_pending_descriptor_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0) , - .testmode_i (1'b0) , - .usage_o (/* don't care for now */) , - .data_i (pending_descriptor_to_fifo_data) , - .valid_i (pending_descriptor_to_fifo_valid) , - .ready_o (pending_descriptor_to_fifo_ready) , - .data_o (pending_descriptor_from_fifo_data) , - .valid_o (pending_descriptor_from_fifo_valid), - .ready_i (pending_descriptor_from_fifo_ready) - ); - // }}} pending descriptor FIFO - - // {{{ counter module - idma_desc64_shared_counter #( - .CounterWidth(TxDoneBufferWidth) - ) i_completion_counter ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .increment_i (dma_be_tx_complete_i) , - .decrement_i (completion_counter_decrement), - .greater_than_zero_o(completion_counter_has_items) - ); - // }}} counter module - - // {{{ regbus master arbitration - reg_mux #( - .NoPorts (2) , - .AW (AddrWidth), - .DW (AddrWidth), - .req_t (reg_req_t), - .rsp_t (reg_rsp_t) - ) i_master_arbitration ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .in_req_i ({submitter_master_req, feedback_master_req_q}), - .in_rsp_o ({submitter_master_rsp, feedback_master_rsp}) , - .out_req_o(master_req_o) , - .out_rsp_i(master_rsp_i) - ); - // }}} regbus master arbitration - - // }}} instantiated modules - - // {{{ state-holding processes - - // {{{ submitter FSM - // state - `FF(submitter_q, submitter_d, SubmitterIdle); - `FF(submitter_fetch_counter_q, submitter_fetch_counter_d, '0); - - // data - `FF(submitter_current_addr_q, submitter_current_addr_d, '0); - `FF(submitter_current_descriptor_q, submitter_current_descriptor_d, '{default: '0}); - - // ready-valid signals - `FF(submitter_burst_valid_q, submitter_burst_valid_d, '0); - `FF(submitter_pending_fifo_valid_q, submitter_pending_fifo_valid_d, '0); - // }}} submitter FSM - - // {{{ feedback FSM - `FF(feedback_fsm_q, feedback_fsm_d, FeedbackIdle); - - // data - `FF(feedback_addr_irq_q, feedback_addr_irq_d, '0); - `FF(feedback_irq_q, feedback_irq_d, '0); - - // register_interface master request - `FF(feedback_master_req_q, feedback_master_req_d, '{default: '0}); - - // ready-valid signals - `FF(feedback_pending_descriptor_ready_q, feedback_pending_descriptor_ready_d, '0); - `FF(feedback_counter_ready_q, feedback_counter_ready_d, '0); - // }}} feedback FSM - - // }}} state-holding processes - - // {{{ output assignments - assign dma_be_req_o = submitter_burst_req; - assign dma_be_valid_o = submitter_burst_valid_q; - assign irq_o = feedback_irq_q; - // }}} output assignments +// The three fifos for idma_req, irqs and feedback addresses must fill +// and empty in lockstep. Capacity is tested at the idma_req fifo, the +// other two ready signals are ignored. +// pragma translate_off +`ASSERT_IF(NoIrqDropped, do_irq_ready, do_irq_valid); +`ASSERT_IF(NoAddrDropped, feedback_addr_ready, feedback_addr_valid); +// pragma translate_on endmodule : idma_desc64_top diff --git a/src/include/idma/tracer.svh b/src/include/idma/tracer.svh new file mode 100644 index 00000000..d8f6740d --- /dev/null +++ b/src/include/idma/tracer.svh @@ -0,0 +1,101 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +// Macro holding all the resources for the iDMA backend tracer +`ifndef IDMA_TRACER_SVH_ +`define IDMA_TRACER_SVH_ + +// largest type to trace +`define IDMA_TRACER_MAX_TYPE_WIDTH 1024 +`define IDMA_TRACER_MAX_TYPE logic [`IDMA_TRACER_MAX_TYPE_WIDTH-1:0] + +// string assembly function +`define IDMA_TRACER_STR_ASSEMBLY(__dict, __cond) \ + if(__cond) begin \ + trace = $sformatf("%s'%s':{", trace, `"__dict`"); \ + foreach(__dict``[key]) trace = $sformatf("%s'%s': 0x%0x,", trace, key, __dict``[key]); \ + trace = $sformatf("%s},", trace); \ + end + +// helper to clear a condition +`define IDMA_TRACER_CLEAR_COND(__cond) \ + if(__cond) begin \ + __cond = ~__cond; \ + end + +// The tracer for the iDMA +`define IDMA_TRACER(__backend_inst, __out_f_name) \ +`ifndef SYNTHESYS \ +`ifndef VERILATOR \ + initial begin : inital_tracer \ + automatic bit first_iter = 1; \ + automatic integer tf; \ + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; \ + automatic `IDMA_TRACER_MAX_TYPE meta [string]; \ + automatic `IDMA_TRACER_MAX_TYPE busy [string]; \ + automatic `IDMA_TRACER_MAX_TYPE axib [string]; \ + automatic string trace; \ + #0; \ + tf = $fopen(__out_f_name, "w"); \ + $display("[Tracer] Logging iDMA backend %s to %s", `"__backend_inst`", __out_f_name); \ + forever begin \ + @(posedge __backend_inst``.clk_i); \ + if(__backend_inst``.rst_ni & |__backend_inst``.busy_o) begin \ + /* Trace */ \ + trace = "{"; \ + /* Constants */ \ + cnst = '{ \ + "inst" : `"__backend_inst`", \ + "data_width" : __backend_inst``.DataWidth, \ + "addr_width" : __backend_inst``.AddrWidth, \ + "user_width" : __backend_inst``.UserWidth, \ + "axi_id_width" : __backend_inst``.AxiIdWidth, \ + "num_ax_in_flight" : __backend_inst``.NumAxInFlight, \ + "buffer_depth" : __backend_inst``.BufferDepth, \ + "tf_len_width" : __backend_inst``.TFLenWidth, \ + "mem_sys_depth" : __backend_inst``.MemSysDepth, \ + "rw_coupling_avail" : __backend_inst``.RAWCouplingAvail, \ + "mask_invalid_data" : __backend_inst``.MaskInvalidData, \ + "hardware_legalizer" : __backend_inst``.HardwareLegalizer, \ + "reject_zero_transfers" : __backend_inst``.RejectZeroTransfers, \ + "error_cap" : __backend_inst``.ErrorCap, \ + "print_fifo_info" : __backend_inst``.PrintFifoInfo \ + }; \ + meta = '{ \ + "time" : $time() \ + }; \ + busy = '{ \ + "buffer" : __backend_inst``.busy_o.buffer_busy, \ + "r_dp" : __backend_inst``.busy_o.r_dp_busy, \ + "w_dp" : __backend_inst``.busy_o.w_dp_busy, \ + "r_leg" : __backend_inst``.busy_o.r_leg_busy, \ + "w_leg" : __backend_inst``.busy_o.w_leg_busy, \ + "eh_fsm" : __backend_inst``.busy_o.eh_fsm_busy, \ + "eh_cnt" : __backend_inst``.busy_o.eh_cnt_busy, \ + "raw_coupler" : __backend_inst``.busy_o.raw_coupler_busy \ + }; \ + axib = '{ \ + "w_valid" : __backend_inst``.axi_req_o.w_valid, \ + "w_ready" : __backend_inst``.axi_rsp_i.w_ready, \ + "w_strb" : __backend_inst``.axi_req_o.w.strb, \ + "r_valid" : __backend_inst``.axi_rsp_i.r_valid, \ + "r_ready" : __backend_inst``.axi_req_o.r_ready \ + }; \ + /* Assembly */ \ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); \ + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); \ + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); \ + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); \ + `IDMA_TRACER_CLEAR_COND(first_iter); \ + /* Commit */ \ + $fwrite(tf, $sformatf("%s}\n", trace)); \ + end \ + end \ + end \ +`endif \ +`endif + +`endif diff --git a/src/systems/cva6_desc/dma_desc_synth.sv b/src/systems/cva6_desc/dma_desc_synth.sv new file mode 100644 index 00000000..23ee0d5c --- /dev/null +++ b/src/systems/cva6_desc/dma_desc_synth.sv @@ -0,0 +1,59 @@ +module dma_desc_synth #( + parameter int AxiAddrWidth = dma_desc_synth_pkg::AxiAddrWidth, + parameter int AxiDataWidth = dma_desc_synth_pkg::AxiDataWidth, + parameter int AxiUserWidth = dma_desc_synth_pkg::AxiUserWidth, + parameter int AxiIdWidth = dma_desc_synth_pkg::AxiIdWidth, + parameter int AxiSlvIdWidth = dma_desc_synth_pkg::AxiSlvIdWidth, + parameter int NSpeculation = dma_desc_synth_pkg::NSpeculation, + parameter int PendingFifoDepth = dma_desc_synth_pkg::PendingFifoDepth, + parameter int InputFifoDepth = dma_desc_synth_pkg::InputFifoDepth, + parameter type mst_aw_chan_t = dma_desc_synth_pkg::mst_aw_chan_t, + parameter type mst_w_chan_t = dma_desc_synth_pkg::mst_w_chan_t, + parameter type mst_b_chan_t = dma_desc_synth_pkg::mst_b_chan_t, + parameter type mst_ar_chan_t = dma_desc_synth_pkg::mst_ar_chan_t, + parameter type mst_r_chan_t = dma_desc_synth_pkg::mst_r_chan_t, + parameter type axi_mst_req_t = dma_desc_synth_pkg::axi_mst_req_t, + parameter type axi_mst_rsp_t = dma_desc_synth_pkg::axi_mst_rsp_t, + parameter type axi_slv_req_t = dma_desc_synth_pkg::axi_slv_req_t, + parameter type axi_slv_rsp_t = dma_desc_synth_pkg::axi_slv_rsp_t +)( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + output logic irq_o, + output axi_mst_req_t axi_master_req_o, + input axi_mst_rsp_t axi_master_rsp_i, + input axi_slv_req_t axi_slave_req_i, + output axi_slv_rsp_t axi_slave_rsp_o +); + +dma_desc_wrap #( + .AxiAddrWidth (AxiAddrWidth ), + .AxiDataWidth (AxiDataWidth ), + .AxiUserWidth (AxiUserWidth ), + .AxiIdWidth (AxiIdWidth ), + .AxiSlvIdWidth (AxiSlvIdWidth), + .NSpeculation (NSpeculation), + .PendingFifoDepth(PendingFifoDepth), + .InputFifoDepth(InputFifoDepth), + .mst_aw_chan_t (mst_aw_chan_t), + .mst_w_chan_t (mst_w_chan_t ), + .mst_b_chan_t (mst_b_chan_t ), + .mst_ar_chan_t (mst_ar_chan_t), + .mst_r_chan_t (mst_r_chan_t ), + .axi_mst_req_t (axi_mst_req_t), + .axi_mst_rsp_t (axi_mst_rsp_t), + .axi_slv_req_t (axi_slv_req_t), + .axi_slv_rsp_t (axi_slv_rsp_t) +) i_dma_desc_wrap ( + .clk_i, + .rst_ni, + .testmode_i, + .irq_o, + .axi_master_req_o, + .axi_master_rsp_i, + .axi_slave_req_i, + .axi_slave_rsp_o +); + +endmodule diff --git a/src/systems/cva6_desc/dma_desc_synth_pkg.sv b/src/systems/cva6_desc/dma_desc_synth_pkg.sv new file mode 100644 index 00000000..f5778e9f --- /dev/null +++ b/src/systems/cva6_desc/dma_desc_synth_pkg.sv @@ -0,0 +1,22 @@ +`include "axi/typedef.svh" + +package dma_desc_synth_pkg; + `AXI_TYPEDEF_ALL(axi, logic [63:0], logic [2:0], logic [63:0], logic [7:0], logic) + parameter int AxiAddrWidth = 64; + parameter int AxiDataWidth = 64; + parameter int AxiUserWidth = 1; + parameter int AxiIdWidth = 3; + parameter int AxiSlvIdWidth = 3; + parameter int NSpeculation = 4; + parameter int PendingFifoDepth = 4; + parameter int InputFifoDepth = 1; + parameter type mst_aw_chan_t = axi_aw_chan_t; // AW Channel Type, master port + parameter type mst_w_chan_t = axi_w_chan_t; // W Channel Type, all ports + parameter type mst_b_chan_t = axi_b_chan_t; // B Channel Type, master port + parameter type mst_ar_chan_t = axi_ar_chan_t; // AR Channel Type, master port + parameter type mst_r_chan_t = axi_r_chan_t; // R Channel Type, master port + parameter type axi_mst_req_t = axi_req_t; + parameter type axi_mst_rsp_t = axi_resp_t; + parameter type axi_slv_req_t = axi_req_t; + parameter type axi_slv_rsp_t = axi_resp_t; +endpackage diff --git a/src/systems/cva6_desc/dma_desc_wrap.sv b/src/systems/cva6_desc/dma_desc_wrap.sv index 7d3b86cc..7580fe1b 100644 --- a/src/systems/cva6_desc/dma_desc_wrap.sv +++ b/src/systems/cva6_desc/dma_desc_wrap.sv @@ -7,25 +7,29 @@ `include "axi/assign.svh" `include "axi/typedef.svh" `include "idma/typedef.svh" +`include "idma/tracer.svh" `include "register_interface/typedef.svh" `include "common_cells/registers.svh" /// Wrapper for the iDMA module dma_desc_wrap #( - parameter int AxiAddrWidth = 64, - parameter int AxiDataWidth = 64, - parameter int AxiUserWidth = -1, - parameter int AxiIdWidth = -1, - parameter int AxiSlvIdWidth = -1, - parameter type mst_aw_chan_t = logic, // AW Channel Type, master port - parameter type mst_w_chan_t = logic, // W Channel Type, all ports - parameter type mst_b_chan_t = logic, // B Channel Type, master port - parameter type mst_ar_chan_t = logic, // AR Channel Type, master port - parameter type mst_r_chan_t = logic, // R Channel Type, master port - parameter type axi_mst_req_t = logic, - parameter type axi_mst_rsp_t = logic, - parameter type axi_slv_req_t = logic, - parameter type axi_slv_rsp_t = logic + parameter int AxiAddrWidth = 64, + parameter int AxiDataWidth = 64, + parameter int AxiUserWidth = -1, + parameter int AxiIdWidth = -1, + parameter int AxiSlvIdWidth = -1, + parameter int NSpeculation = 4, + parameter int PendingFifoDepth = 4, + parameter int InputFifoDepth = 1, + parameter type mst_aw_chan_t = logic, // AW Channel Type, master port + parameter type mst_w_chan_t = logic, // W Channel Type, all ports + parameter type mst_b_chan_t = logic, // B Channel Type, master port + parameter type mst_ar_chan_t = logic, // AR Channel Type, master port + parameter type mst_r_chan_t = logic, // R Channel Type, master port + parameter type axi_mst_req_t = logic, + parameter type axi_mst_rsp_t = logic, + parameter type axi_slv_req_t = logic, + parameter type axi_slv_rsp_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -45,6 +49,9 @@ module dma_desc_wrap #( // has one less bit for the mux not to error typedef logic [AxiIdWidth-2:0] post_mux_id_t; + localparam int unsigned NumAxInFlight = NSpeculation < 3 ? 3 : NSpeculation; + localparam int unsigned BufferDepth = 3; + axi_slv_req_t axi_slv_req; axi_slv_rsp_t axi_slv_rsp; @@ -55,45 +62,59 @@ module dma_desc_wrap #( dma_axi_mst_post_mux_resp_t axi_be_mst_rsp; `REG_BUS_TYPEDEF_ALL(dma_reg, addr_t, data_t, strb_t) - dma_reg_req_t dma_reg_mst_req; - dma_reg_rsp_t dma_reg_mst_rsp; dma_reg_req_t dma_reg_slv_req; dma_reg_rsp_t dma_reg_slv_rsp; // iDMA struct definitions localparam int unsigned TFLenWidth = 32; typedef logic [TFLenWidth-1:0] tf_len_t; - typedef logic [RepWidth-1:0] reps_t; - typedef logic [StrideWidth-1:0] strides_t; // iDMA request / response types `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, post_mux_id_t, addr_t, tf_len_t) `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) - burst_req_t dma_be_req; - logic dma_be_tx_complete; - logic dma_be_valid; - logic dma_be_ready; + idma_req_t idma_req; + logic idma_req_valid; + logic idma_req_ready; + + idma_rsp_t idma_rsp; + logic idma_rsp_valid; + logic idma_rsp_ready; idma_pkg::idma_busy_t idma_busy; idma_desc64_top #( - .AddrWidth (AxiAddrWidth) , - .burst_req_t(burst_req_t) , - .reg_req_t (dma_reg_req_t), - .reg_rsp_t (dma_reg_rsp_t) + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .AxiIdWidth ( AxiIdWidth - 1 ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .axi_req_t ( dma_axi_mst_post_mux_req_t ), + .axi_rsp_t ( dma_axi_mst_post_mux_resp_t ), + .axi_ar_chan_t ( dma_axi_mst_post_mux_ar_chan_t ), + .axi_r_chan_t ( dma_axi_mst_post_mux_r_chan_t ), + .reg_req_t ( dma_reg_req_t ), + .reg_rsp_t ( dma_reg_rsp_t ), + .InputFifoDepth ( InputFifoDepth ), + .PendingFifoDepth ( PendingFifoDepth ), + .BackendDepth ( NumAxInFlight + BufferDepth ), + .NSpeculation ( NSpeculation ) ) i_dma_desc64 ( .clk_i, .rst_ni, - .master_req_o ( dma_reg_mst_req ), - .master_rsp_i ( dma_reg_mst_rsp ), - .slave_req_i ( dma_reg_slv_req ), - .slave_rsp_o ( dma_reg_slv_rsp ), - .dma_be_tx_complete_i ( dma_be_tx_complete ), - .dma_be_idle_i ( ~|idma_busy ), - .dma_be_valid_o ( dma_be_valid ), - .dma_be_ready_i ( dma_be_ready ), - .dma_be_req_o ( dma_be_req ), - .irq_o ( irq_o ) + .master_req_o ( axi_fe_mst_req ), + .master_rsp_i ( axi_fe_mst_rsp ), + .axi_ar_id_i ( '1 ), + .axi_aw_id_i ( '1 ), + .slave_req_i ( dma_reg_slv_req ), + .slave_rsp_o ( dma_reg_slv_rsp ), + .idma_req_o ( idma_req ), + .idma_req_valid_o ( idma_req_valid ), + .idma_req_ready_i ( idma_req_ready ), + .idma_rsp_i ( idma_rsp ), + .idma_rsp_valid_i ( idma_rsp_valid ), + .idma_rsp_ready_o ( idma_rsp_ready ), + .idma_busy_i ( |idma_busy ), + .irq_o ( irq_o ) ); idma_backend #( @@ -101,8 +122,8 @@ module dma_desc_wrap #( .AddrWidth ( AxiAddrWidth ), .UserWidth ( AxiUserWidth ), .AxiIdWidth ( AxiIdWidth-1 ), - .NumAxInFlight ( 2 ), - .BufferDepth ( 3 ), + .NumAxInFlight ( NumAxInFlight ), + .BufferDepth ( BufferDepth ), .TFLenWidth ( TFLenWidth ), .RAWCouplingAvail ( 1'b1 ), .MaskInvalidData ( 1'b1 ), @@ -114,30 +135,113 @@ module dma_desc_wrap #( .idma_rsp_t ( idma_rsp_t ), .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), .idma_busy_t ( idma_pkg::idma_busy_t ), - .axi_req_t ( axi_slv_req_t ), - .axi_rsp_t ( axi_slv_resp_t ) + .axi_req_t ( dma_axi_mst_post_mux_req_t ), + .axi_rsp_t ( dma_axi_mst_post_mux_resp_t ) ) i_idma_backend ( .clk_i, .rst_ni, - .testmode_i ( testmode_i ), + .testmode_i ( testmode_i ), - .idma_req_i ( dma_be_req ), - .req_valid_i ( dma_be_valid ), - .req_ready_o ( dma_be_ready ), + .idma_req_i ( idma_req ), + .req_valid_i ( idma_req_valid ), + .req_ready_o ( idma_req_ready ), - .idma_rsp_o ( /*NOT CONNECTED*/ ), - .rsp_valid_o ( dma_be_tx_complete ), - .rsp_ready_i ( 1'b1 ), + .idma_rsp_o ( idma_rsp ), + .rsp_valid_o ( idma_rsp_valid ), + .rsp_ready_i ( idma_rsp_ready ), - .idma_eh_req_i ( '0 ), // No error handling - .eh_req_valid_i( 1'b1 ), - .eh_req_ready_o( /*NOT CONNECTED*/ ), + .idma_eh_req_i ( '0 ), // No error handling + .eh_req_valid_i( 1'b1 ), + .eh_req_ready_o( /*NOT CONNECTED*/ ), - .axi_req_o ( axi_be_mst_req ), - .axi_rsp_i ( axi_be_mst_rsp ), - .busy_o ( idma_busy ) + .axi_req_o ( axi_be_mst_req ), + .axi_rsp_i ( axi_be_mst_rsp ), + .busy_o ( idma_busy ) ); + // pragma translate_off + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + `ifndef SYNTHESYS + `ifndef VERILATOR + initial begin : inital_tracer + automatic bit first_iter = 1; + automatic integer tf; + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; + automatic `IDMA_TRACER_MAX_TYPE meta [string]; + automatic `IDMA_TRACER_MAX_TYPE busy [string]; + automatic `IDMA_TRACER_MAX_TYPE axib [string]; + automatic string trace; + #0; + tf = $fopen(trace_file, "w"); + $display("[Tracer] Logging iDMA backend %s to %s", "i_idma_backend", trace_file); + forever begin + @(posedge i_idma_backend.clk_i); + if (i_idma_backend.rst_ni & |i_idma_backend.busy_o) begin + break; + end + end + forever begin + @(posedge i_idma_backend.clk_i); + /* Trace */ + trace = "{"; + /* Constants */ + cnst = '{ + "inst" : "i_idma_backend", + "data_width" : i_idma_backend.DataWidth, + "addr_width" : i_idma_backend.AddrWidth, + "user_width" : i_idma_backend.UserWidth, + "axi_id_width" : i_idma_backend.AxiIdWidth, + "num_ax_in_flight" : i_idma_backend.NumAxInFlight, + "buffer_depth" : i_idma_backend.BufferDepth, + "tf_len_width" : i_idma_backend.TFLenWidth, + "mem_sys_depth" : i_idma_backend.MemSysDepth, + "rw_coupling_avail" : i_idma_backend.RAWCouplingAvail, + "mask_invalid_data" : i_idma_backend.MaskInvalidData, + "hardware_legalizer" : i_idma_backend.HardwareLegalizer, + "reject_zero_transfers" : i_idma_backend.RejectZeroTransfers, + "error_cap" : i_idma_backend.ErrorCap, + "print_fifo_info" : i_idma_backend.PrintFifoInfo + }; + meta = '{ + "time" : $time() + }; + busy = '{ + "buffer" : i_idma_backend.busy_o.buffer_busy, + "r_dp" : i_idma_backend.busy_o.r_dp_busy, + "w_dp" : i_idma_backend.busy_o.w_dp_busy, + "r_leg" : i_idma_backend.busy_o.r_leg_busy, + "w_leg" : i_idma_backend.busy_o.w_leg_busy, + "eh_fsm" : i_idma_backend.busy_o.eh_fsm_busy, + "eh_cnt" : i_idma_backend.busy_o.eh_cnt_busy, + "raw_coupler" : i_idma_backend.busy_o.raw_coupler_busy + }; + axib = '{ + "w_valid" : i_idma_backend.axi_req_o.w_valid, + "w_ready" : axi_be_mst_rsp.w_ready, + "w_strb" : i_idma_backend.axi_req_o.w.strb, + "r_valid" : axi_be_mst_rsp.r_valid, + "r_ready" : i_idma_backend.axi_req_o.r_ready + }; + if ($isunknown(axib["w_ready"]) || $isunknown(axib["r_valid"])) begin + $fatal("UNKNOWN AXI STATE, THIS SHOULD NEVER HAPPEN!"); + end + /* Assembly */ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); + `IDMA_TRACER_CLEAR_COND(first_iter); + /* Commit */ + $fwrite(tf, $sformatf("%s}\n", trace)); + end + end +`endif +`endif + // pragma translate_on + axi_mux #( .SlvAxiIDWidth(AxiIdWidth - 1), .slv_aw_chan_t(dma_axi_mst_post_mux_aw_chan_t), @@ -186,28 +290,13 @@ module dma_desc_wrap #( ) i_axi_to_reg ( .clk_i (clk_i), .rst_ni (rst_ni), - .testmode_i(1'b0), + .testmode_i(testmode_i), .axi_req_i (axi_slv_req), .axi_rsp_o (axi_slv_rsp), .reg_req_o (dma_reg_slv_req), .reg_rsp_i (dma_reg_slv_rsp) ); - dma_reg_to_axi #( - .axi_req_t (dma_axi_mst_post_mux_req_t), - .axi_rsp_t (dma_axi_mst_post_mux_resp_t), - .reg_req_t (dma_reg_req_t), - .reg_rsp_t (dma_reg_rsp_t), - .ByteWidthInPowersOfTwo($clog2(AxiDataWidth / 8)) - ) i_dma_reg_to_axi ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .axi_req_o(axi_fe_mst_req), - .axi_rsp_i(axi_fe_mst_rsp), - .reg_req_i(dma_reg_mst_req), - .reg_rsp_o(dma_reg_mst_rsp) - ); - assign axi_slv_req = axi_slave_req_i; assign axi_slave_rsp_o = axi_slv_rsp; diff --git a/test/frontends/tb_idma_desc64_bench.sv b/test/frontends/tb_idma_desc64_bench.sv new file mode 100644 index 00000000..baf226d1 --- /dev/null +++ b/test/frontends/tb_idma_desc64_bench.sv @@ -0,0 +1,910 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "register_interface/typedef.svh" +`include "register_interface/assign.svh" +`include "idma/tracer.svh" +`include "idma/typedef.svh" +`include "axi/typedef.svh" +`include "axi/assign.svh" + +import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; +import idma_desc64_reg_pkg::IDMA_DESC64_STATUS_OFFSET; +import rand_verif_pkg::rand_wait; +import axi_pkg::*; +import reg_test::reg_driver; + +module tb_idma_desc64_bench #( + parameter integer NumberOfTests = 100, + parameter integer SimulationTimeoutCycles = 100000, + parameter integer ChainedDescriptors = 10, + parameter integer TransferLength = 1024, + parameter integer AlignmentMask = 'h0f, + parameter integer NumContiguous = 200000, + parameter integer MaxAxInFlight = 64, + parameter bit DoIRQ = 1, + parameter integer TransfersToSkip = 4, + // from frontend + parameter int unsigned InputFifoDepth = 8, + parameter int unsigned PendingFifoDepth = 8, + parameter int unsigned NSpeculation = 4, + // from backend tb + parameter int unsigned BufferDepth = 3, + parameter int unsigned NumAxInFlight = NSpeculation > 3 ? NSpeculation : 3, + parameter int unsigned TFLenWidth = 32, + parameter int unsigned MemSysDepth = 0, + parameter int unsigned MemNumReqOutst = 1, + parameter int unsigned MemLatency = 0, + parameter int unsigned WatchDogNumCycles = 100, + parameter bit MaskInvalidData = 1, + parameter bit RAWCouplingAvail = 1, + parameter bit HardwareLegalizer = 1, + parameter bit RejectZeroTransfers = 1, + parameter bit ErrorHandling = 1, + parameter bit IdealMemory = 1 +) (); + localparam time PERIOD = 10ns; + localparam time APPL_DELAY = PERIOD / 4; + localparam time ACQ_DELAY = PERIOD * 3 / 4; + + localparam integer RESET_CYCLES = 10; + + localparam integer DataWidth = 64; + localparam integer AddrWidth = 64; + localparam integer UserWidth = 1; + localparam integer AxiIdWidth = 3; + + typedef logic [63:0] addr_t; + typedef logic [ 2:0] axi_id_t; + typedef logic [ 3:0] mem_axi_id_t; + typedef axi_test::axi_ax_beat #(.AW(64), .IW(3), .UW(1)) ax_beat_t; + typedef axi_test::axi_r_beat #(.DW(64), .IW(3), .UW(1)) r_beat_t; + typedef axi_test::axi_w_beat #(.DW(64), .UW(1)) w_beat_t; + typedef axi_test::axi_b_beat #(.IW(3), .UW(1)) b_beat_t; + + `REG_BUS_TYPEDEF_ALL(reg, /* addr */ addr_t, /* data */ logic [63:0], /* strobe */ logic [7:0]) + `AXI_TYPEDEF_ALL(axi, /* addr */ addr_t, /* id */ axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) + `AXI_TYPEDEF_ALL(mem_axi, /* addr */ addr_t, /* id */ mem_axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) + + // iDMA struct definitions + typedef logic [TFLenWidth-1:0] tf_len_t; + + // iDMA request / response types + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) + + class stimulus_t; + rand addr_t base; + rand idma_req_t burst; + rand logic do_irq; + addr_t next = 64'hffff_ffff_ffff_ffff; + + // an entire descriptor of 4 words must fit before the end of memory + constraint descriptor_fits_in_memory { (64'hffff_ffff_ffff_ffff - base) > 64'd32; } + constraint descriptor_is_in_descriptor_area { base > 64'h0000_ffff_ffff_ffff; } + constraint descriptor_is_aligned { (base & 64'hf) == 0; } + constraint no_empty_transfers { burst.length > '0; } + constraint src_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.src_addr > burst.length; } + constraint dst_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.dst_addr > burst.length; } + constraint src_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.src_addr + burst.length); } + constraint dst_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.dst_addr + burst.length); } + constraint src_aligned { (burst.src_addr & AlignmentMask) == 64'b0; } + constraint dst_aligned { (burst.dst_addr & AlignmentMask) == 64'b0; } + constraint src_burst_valid { burst.opt.src.burst inside { BURST_INCR }; } + constraint dst_burst_valid { burst.opt.dst.burst inside { BURST_INCR }; } + constraint reduce_len_equal { burst.opt.beo.src_reduce_len == burst.opt.beo.dst_reduce_len; } + constraint reduce_len_zero { burst.opt.beo.src_reduce_len == 1'b0; } + constraint beo_zero { burst.opt.beo.decouple_aw == '0 && burst.opt.beo.src_max_llen == '0 && burst.opt.beo.dst_max_llen == '0 && burst.opt.last == '0 && burst.opt.beo.decouple_rw == '0; } + constraint axi_params_zero_src { burst.opt.src.lock == '0 && burst.opt.src.prot == '0 && burst.opt.src.qos == '0 && burst.opt.src.region == '0; } + constraint axi_params_zero_dst { burst.opt.dst.lock == '0 && burst.opt.dst.prot == '0 && burst.opt.dst.qos == '0 && burst.opt.dst.region == '0; } + constraint axi_src_cache_zero { burst.opt.src.cache == '0; } + constraint axi_dst_cache_zero { burst.opt.dst.cache == '0; } + constraint transfer_length { burst.length == TransferLength; } + constraint irq { do_irq == DoIRQ; } + endclass + + typedef struct { + idma_req_t burst; + addr_t read_address; + logic [7:0] read_length; + logic [2:0] read_size; + addr_t write_address; + logic [7:0] write_length; + logic [2:0] write_size; + logic [63:0] write_data; + logic did_irq; + } result_t; + result_t golden_queue[$]; + + // clocks + logic clk; + logic rst_n; + + clk_rst_gen #( + .ClkPeriod(PERIOD), + .RstClkCycles(RESET_CYCLES) + ) i_clock_reset_generator ( + .clk_o (clk) , + .rst_no(rst_n) + ); + + // dut signals and module + REG_BUS #( + .ADDR_WIDTH(64), + .DATA_WIDTH(64) + ) i_reg_iface_bus (clk); + + reg_driver #( + .AW(64), + .DW(64), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_reg_iface_driver = new (i_reg_iface_bus); + + axi_resp_t dma_fe_master_response; + axi_req_t dma_fe_master_request; + axi_resp_t dma_be_cut_resp; + axi_req_t dma_be_cut_req; + axi_resp_t dma_be_master_response; + axi_req_t dma_be_master_request; + mem_axi_resp_t axi_mem_response; + mem_axi_req_t axi_mem_request; + mem_axi_resp_t axi_throttle_rsp; + mem_axi_req_t axi_throttle_req; + mem_axi_resp_t axi_multicut_rsp; + mem_axi_req_t axi_multicut_req; + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_be_bus (clk); + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_iface_bus (clk); + + axi_test::axi_driver #( + .AW(64), + .DW(64), + .IW(3), + .UW(1), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_axi_iface_driver = new (i_axi_iface_bus); + + reg_rsp_t dma_slave_response; + reg_req_t dma_slave_request; + + idma_pkg::idma_busy_t busy; + idma_req_t dma_be_req; + idma_rsp_t dma_be_rsp; + + logic dma_be_req_valid; + logic dma_be_req_ready; + logic dma_be_rsp_valid; + logic dma_be_rsp_ready; + logic irq; + + idma_desc64_top #( + .AddrWidth (64), + .DataWidth (64), + .AxiIdWidth (3), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .axi_rsp_t (axi_resp_t), + .axi_req_t (axi_req_t), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_r_chan_t (axi_r_chan_t), + .reg_rsp_t (reg_rsp_t), + .reg_req_t (reg_req_t), + .InputFifoDepth (InputFifoDepth), + .PendingFifoDepth(PendingFifoDepth), + .BackendDepth (NumAxInFlight + BufferDepth), + .NSpeculation (NSpeculation) + ) i_dut ( + .clk_i (clk), + .rst_ni (rst_n), + .master_req_o (dma_fe_master_request), + .master_rsp_i (dma_fe_master_response), + .axi_ar_id_i (3'b111), + .axi_aw_id_i (3'b111), + .slave_req_i (dma_slave_request), + .slave_rsp_o (dma_slave_response), + .idma_req_o (dma_be_req), + .idma_req_valid_o(dma_be_req_valid), + .idma_req_ready_i(dma_be_req_ready), + .idma_rsp_i ('0), + .idma_rsp_valid_i(dma_be_rsp_valid), + .idma_rsp_ready_o(dma_be_rsp_ready), + .idma_busy_i (|busy), + .irq_o (irq) + ); + + idma_backend #( + .DataWidth ( DataWidth ), + .AddrWidth ( AddrWidth ), + .AxiIdWidth ( AxiIdWidth ), + .UserWidth ( UserWidth ), + .TFLenWidth ( TFLenWidth ), + .MaskInvalidData ( MaskInvalidData ), + .BufferDepth ( BufferDepth ), + .RAWCouplingAvail ( RAWCouplingAvail ), + .HardwareLegalizer ( HardwareLegalizer ), + .RejectZeroTransfers ( RejectZeroTransfers ), + .NumAxInFlight ( NumAxInFlight ), + .MemSysDepth ( MemSysDepth ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), + .idma_busy_t ( idma_pkg::idma_busy_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_resp_t ) + ) i_idma_backend ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .testmode_i ( 1'b0 ), + .idma_req_i ( dma_be_req ), + .req_valid_i ( dma_be_req_valid ), + .req_ready_o ( dma_be_req_ready ), + .idma_rsp_o ( dma_be_rsp ), + .rsp_valid_o ( dma_be_rsp_valid ), + .rsp_ready_i ( dma_be_rsp_ready ), + .idma_eh_req_i ( '0 ), + .eh_req_valid_i ( '1 ), + .eh_req_ready_o ( /* unconnected */), + .axi_req_o ( dma_be_master_request ), + .axi_rsp_i ( dma_be_master_response ), + .busy_o ( busy ) + ); + + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + `ifndef SYNTHESYS + `ifndef VERILATOR + initial begin : inital_tracer + automatic bit first_iter = 1'b1; + automatic int unsigned skipped_transfers = 0; + automatic int unsigned recorded_transfers = 0; + automatic integer tf; + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; + automatic `IDMA_TRACER_MAX_TYPE meta [string]; + automatic `IDMA_TRACER_MAX_TYPE busy [string]; + automatic `IDMA_TRACER_MAX_TYPE axib [string]; + automatic string trace; + #0; + tf = $fopen(trace_file, "w"); + $display("[Tracer] Logging iDMA backend %s to %s", "i_idma_backend", trace_file); + forever begin + @(posedge i_idma_backend.clk_i); + if (i_idma_backend.rst_ni & irq) begin + skipped_transfers += 1; + if (skipped_transfers > TransfersToSkip) begin + break; + end + end + end + forever begin + @(posedge i_idma_backend.clk_i); + if (irq) begin + recorded_transfers += 1; + if (recorded_transfers >= TransfersToSkip / 2) begin + break; + end + end + /* Trace */ + trace = "{"; + /* Constants */ + cnst = '{ + "inst" : "i_idma_backend", + "data_width" : i_idma_backend.DataWidth, + "addr_width" : i_idma_backend.AddrWidth, + "user_width" : i_idma_backend.UserWidth, + "axi_id_width" : i_idma_backend.AxiIdWidth, + "num_ax_in_flight" : i_idma_backend.NumAxInFlight, + "buffer_depth" : i_idma_backend.BufferDepth, + "tf_len_width" : i_idma_backend.TFLenWidth, + "mem_sys_depth" : i_idma_backend.MemSysDepth, + "rw_coupling_avail" : i_idma_backend.RAWCouplingAvail, + "mask_invalid_data" : i_idma_backend.MaskInvalidData, + "hardware_legalizer" : i_idma_backend.HardwareLegalizer, + "reject_zero_transfers" : i_idma_backend.RejectZeroTransfers, + "error_cap" : i_idma_backend.ErrorCap, + "print_fifo_info" : i_idma_backend.PrintFifoInfo + }; + meta = '{ + "time" : $time() + }; + busy = '{ + "buffer" : i_idma_backend.busy_o.buffer_busy, + "r_dp" : i_idma_backend.busy_o.r_dp_busy, + "w_dp" : i_idma_backend.busy_o.w_dp_busy, + "r_leg" : i_idma_backend.busy_o.r_leg_busy, + "w_leg" : i_idma_backend.busy_o.w_leg_busy, + "eh_fsm" : i_idma_backend.busy_o.eh_fsm_busy, + "eh_cnt" : i_idma_backend.busy_o.eh_cnt_busy, + "raw_coupler" : i_idma_backend.busy_o.raw_coupler_busy + }; + axib = '{ + "w_valid" : i_idma_backend.axi_req_o.w_valid, + "w_ready" : dma_be_master_response.w_ready, + "w_strb" : i_idma_backend.axi_req_o.w.strb, + "r_valid" : dma_be_master_response.r_valid, + "r_ready" : i_idma_backend.axi_req_o.r_ready + }; + if ($isunknown(axib["w_ready"]) || $isunknown(axib["r_valid"])) begin + $fatal("UNKNOWN AXI STATE, THIS SHOULD NEVER HAPPEN!"); + end + /* Assembly */ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); + `IDMA_TRACER_CLEAR_COND(first_iter); + /* Commit */ + $fwrite(tf, $sformatf("%s}\n", trace)); + end + end +`endif +`endif + + /* + axi_cut #( + .aw_chan_t (axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .b_chan_t (axi_b_chan_t), + .ar_chan_t (axi_ar_chan_t), + .r_chan_t (axi_r_chan_t), + .axi_req_t (axi_req_t), + .axi_resp_t(axi_resp_t) + ) i_axi_cut ( + .clk_i (clk), + .rst_ni (rst_n), + .slv_req_i (dma_be_cut_req), + .slv_resp_o (dma_be_cut_resp), + .mst_req_o (dma_be_master_request), + .mst_resp_i (dma_be_master_response) + ); + */ + + // AXI mux + axi_mux #( + .SlvAxiIDWidth (3), + .slv_aw_chan_t (axi_aw_chan_t), + .mst_aw_chan_t (mem_axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .slv_b_chan_t (axi_b_chan_t), + .mst_b_chan_t (mem_axi_b_chan_t), + .slv_ar_chan_t (axi_ar_chan_t), + .mst_ar_chan_t (mem_axi_ar_chan_t), + .slv_r_chan_t (axi_r_chan_t), + .mst_r_chan_t (mem_axi_r_chan_t), + .slv_req_t (axi_req_t), + .slv_resp_t (axi_resp_t), + .mst_req_t (mem_axi_req_t), + .mst_resp_t (mem_axi_resp_t), + .NoSlvPorts (2), + .MaxWTrans (MaxAxInFlight), + .FallThrough (1'b0), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), + .SpillAr (1'b0), + .SpillR (1'b0) + ) i_mux ( + .clk_i (clk), + .rst_ni (rst_n), + .test_i (1'b0), + .slv_reqs_i ({dma_be_master_request, dma_fe_master_request}), + .slv_resps_o({dma_be_master_response, dma_fe_master_response}), + .mst_req_o (axi_throttle_req), + .mst_resp_i (axi_throttle_rsp) + ); + + // sim memory + idma_sim_mem #( + .AddrWidth ( AddrWidth ), + .DataWidth ( DataWidth ), + .IdWidth (AxiIdWidth + 1), + .UserWidth ( UserWidth ), + .req_t ( mem_axi_req_t), + .rsp_t (mem_axi_resp_t), + .WarnUninitialized ( 1'b0 ), + .ClearErrOnAccess ( 1'b1 ), + .ApplDelay ( APPL_DELAY ), + .AcqDelay ( ACQ_DELAY ) + ) i_idma_sim_mem ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .axi_req_i ( axi_mem_request ), + .axi_rsp_o ( axi_mem_response ) + ); + + // allow 1 AR, 1 AW in-flight + axi_throttle #( + .MaxNumAwPending(MaxAxInFlight), + .MaxNumArPending(MaxAxInFlight), + .axi_req_t(mem_axi_req_t), + .axi_rsp_t(mem_axi_resp_t) + ) i_axi_throttle ( + .clk_i (clk), + .rst_ni(rst_n), + .req_i(axi_throttle_req), + .rsp_o(axi_throttle_rsp), + .req_o(axi_multicut_req), + .rsp_i(axi_multicut_rsp), + .w_credit_i (MaxAxInFlight), + .r_credit_i (MaxAxInFlight) + ); + + // delay the signals using AXI4 multicuts + axi_multicut #( + .NoCuts ( MemLatency ), + .aw_chan_t ( mem_axi_aw_chan_t ), + .w_chan_t ( mem_axi_w_chan_t ), + .b_chan_t ( mem_axi_b_chan_t ), + .ar_chan_t ( mem_axi_ar_chan_t ), + .r_chan_t ( mem_axi_r_chan_t ), + .axi_req_t ( mem_axi_req_t ), + .axi_resp_t ( mem_axi_resp_t ) + ) i_axi_multicut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .slv_req_i ( axi_multicut_req ), + .slv_resp_o ( axi_multicut_rsp ), + .mst_req_o ( axi_mem_request ), + .mst_resp_i ( axi_mem_response ) + ); + + `REG_BUS_ASSIGN_TO_REQ(dma_slave_request, i_reg_iface_bus); + `REG_BUS_ASSIGN_FROM_RSP(i_reg_iface_bus, dma_slave_response); + + `AXI_ASSIGN_FROM_REQ(i_axi_iface_bus, dma_fe_master_request); + `AXI_ASSIGN_FROM_RESP(i_axi_iface_bus, dma_fe_master_response); + + `AXI_ASSIGN_FROM_REQ(i_axi_be_bus, dma_be_master_request); + `AXI_ASSIGN_FROM_RESP(i_axi_be_bus, dma_be_master_response); + + initial begin + i_axi_iface_driver.reset_slave(); + end + + // queues for communication and data transfer + stimulus_t generated_stimuli[$][$]; + result_t ar_seen_result[$]; + result_t inflight_results_after_reads[$]; + result_t inflight_results_submitted_to_be[$]; + result_t aw_seen_result[$]; + result_t w_seen_result[$]; + result_t result_queue[$]; + + function automatic void generate_stimuli(); + automatic addr_t base_current = 64'h0001_0000_0000_0000; + automatic int contiguous = 0; + repeat (NumberOfTests) begin + automatic stimulus_t current_stimulus; + automatic stimulus_t current_stimuli_group[$]; + automatic int number_of_descriptors_in_test; + + number_of_descriptors_in_test = ChainedDescriptors; + + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + current_stimulus.base = base_current; + current_stimuli_group.push_back(current_stimulus); + contiguous += 1; + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + if (contiguous != NumContiguous) begin + base_current += 'd32; + end else begin + // make sure all invalid prefetches grab Xs from memory + base_current += 'h1000; + contiguous = '0; + end + end + + repeat (number_of_descriptors_in_test - 1) begin + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + current_stimulus.base = base_current; + contiguous += 1; + + // chain descriptor + current_stimuli_group[$].next = current_stimulus.base; + + current_stimuli_group.push_back(current_stimulus); + + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + end + if (contiguous != NumContiguous) begin + base_current += 'd32; + end else begin + // make sure all invalid prefetches grab Xs from memory + base_current += 'h1000; + contiguous = '0; + end + end + generated_stimuli.push_back(current_stimuli_group); + end + // make the last stimulus generate an irq to simplify the IRQ + // acquisition + // NOTE: with few requests this might impact statitics of the no-IRQ + // case + generated_stimuli[$][$].do_irq = 1'b1; + golden_queue[$].did_irq = 1'b1; + endfunction : generate_stimuli + + function automatic void write_mem_64(addr_t base, logic[63:0] data); + i_idma_sim_mem.mem[base] = data[ 7: 0]; + i_idma_sim_mem.mem[base + 1] = data[15: 8]; + i_idma_sim_mem.mem[base + 2] = data[23:16]; + i_idma_sim_mem.mem[base + 3] = data[31:24]; + i_idma_sim_mem.mem[base + 4] = data[39:32]; + i_idma_sim_mem.mem[base + 5] = data[47:40]; + i_idma_sim_mem.mem[base + 6] = data[55:48]; + i_idma_sim_mem.mem[base + 7] = data[63:56]; + endfunction : write_mem_64 + + function automatic void load_descriptors_into_memory(); + $display("Loading descriptors"); + foreach (generated_stimuli[i]) begin + foreach (generated_stimuli[i][j]) begin + automatic addr_t base = generated_stimuli[i][j].base; + write_mem_64(base, stimulus_to_flag_bits(generated_stimuli[i][j])); + if (j == (generated_stimuli[i].size() - 1)) begin + write_mem_64(base + 64'h8, 64'hffff_ffff_ffff_ffff); + end else begin + write_mem_64(base + 64'h8, generated_stimuli[i][j+1].base); + end + write_mem_64(base + 64'h10, generated_stimuli[i][j].burst.src_addr); + write_mem_64(base + 64'h18, generated_stimuli[i][j].burst.dst_addr); + end + end + endfunction : load_descriptors_into_memory + + task apply_stimuli(); + fork + regbus_slave_interaction(); + join + endtask + + task collect_responses(); + fork + axi_master_acquire_ars(); + axi_master_acquire_rs(); + axi_master_acquire_aw(); + axi_master_acquire_w(); + axi_master_acquire_irqs(); + acquire_bursts(); + join + endtask + + // regbus slave interaction (we're acting as master) + task regbus_slave_interaction(); + automatic stimulus_t current_stimulus_group[$]; + i_reg_iface_driver.reset_master(); + @(posedge rst_n); + + forever begin + automatic logic [63:0] status; + automatic addr_t start_addr; + automatic logic error; + + wait (generated_stimuli.size() > '0); + current_stimulus_group = generated_stimuli.pop_front(); + + i_reg_iface_driver.send_write( + .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , + .data (current_stimulus_group[0].base), + .strb (8'hff) , + .error(error) + ); + end + endtask + + function automatic logic [63:0] stimulus_to_flag_bits(stimulus_t stim); + // Copied from frontend: + // bit 0 set to trigger an irq on completion, unset to not be notified + // bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 + // bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 + // for a description of these modes, check AXI-Pulp documentation + // bit 5 set to decouple reads and writes in the backend + // bit 6 set to serialize requests. Not setting might violate AXI spec + // bit 7 set to deburst (each burst is split into own transfer) + // for a more thorough description, refer to the iDMA backend documentation + // bits 11:8 Bitfield for AXI cache attributes for the source + // bits 15:12 Bitfield for AXI cache attributes for the destination + // bits of the bitfield (refer to AXI-Pulp for a description): + // bit 0: cache bufferable + // bit 1: cache modifiable + // bit 2: cache read alloc + // bit 3: cache write alloc + // bits 23:16 AXI ID used for the transfer + // bits 31:26 unused/reserved + automatic logic [63:0] result = '0; + automatic logic [31:0] flags = '0; + + flags[0] = stim.do_irq; + flags[2:1] = stim.burst.opt.src.burst; + flags[4:3] = stim.burst.opt.dst.burst; + flags[5] = stim.burst.opt.beo.decouple_rw; + flags[6] = 1'b0; + // flags[6] = stim.burst.opt.beo.serialize; + flags[7] = stim.burst.opt.beo.src_reduce_len; + flags[11:8] = stim.burst.opt.src.cache; + flags[15:12] = stim.burst.opt.dst.cache; + flags[23:16] = stim.burst.opt.axi_id; + flags[31:26] = '0; + + result[31:0] = stim.burst.length; + result[63:32] = flags; + return result; + endfunction + + task axi_master_acquire_ars(); + @(posedge rst_n); + forever begin + automatic ax_beat_t ar_beat; + automatic result_t current_result; + // monitor ar + i_axi_iface_driver.mon_ar(ar_beat); + // and record contents + current_result.read_address = ar_beat.ax_addr; + current_result.read_length = ar_beat.ax_len; + current_result.read_size = ar_beat.ax_size; + ar_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_ars + + task axi_master_acquire_rs(); + @(posedge rst_n); + forever begin + automatic r_beat_t r_beat; + automatic result_t current_result; + wait (ar_seen_result.size() > 0); + current_result = ar_seen_result.pop_front(); + i_axi_iface_driver.mon_r(r_beat); + if ($isunknown(r_beat.r_data)) begin + // drop current result + // as it is a prefetched one + end else begin + inflight_results_after_reads.push_back(current_result); + end + // four reads per descriptor in the 64-bit case + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + if (!r_beat.r_last) begin + $error("R acquisition has come out-of-sync."); + end + end + endtask : axi_master_acquire_rs + + task axi_master_acquire_aw(); + // set to one to skip first submission of what would be an invalid result + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic ax_beat_t aw_beat; + i_axi_iface_driver.mon_aw(aw_beat); + + wait (inflight_results_submitted_to_be.size() > 0); + current_result = inflight_results_submitted_to_be.pop_front(); + current_result.write_address = aw_beat.ax_addr; + current_result.write_length = aw_beat.ax_len; + current_result.write_size = aw_beat.ax_size; + aw_seen_result.push_back(current_result); + end + endtask + + task axi_master_acquire_w(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic w_beat_t w_beat; + i_axi_iface_driver.mon_w(w_beat); + wait (aw_seen_result.size() > 0); + current_result = aw_seen_result.pop_front(); + current_result.write_data = w_beat.w_data; + w_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_w + + task axi_master_acquire_irqs(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic b_beat_t b_beat; + automatic result_t current_result; + + // HACK: I'm taking advantage of the knowledge that the irq and + // B happen in the same cycle + i_axi_iface_driver.mon_b(b_beat); + wait(w_seen_result.size() > 0); + current_result = w_seen_result.pop_front(); + current_result.did_irq = irq; + result_queue.push_back(current_result); + end + endtask : axi_master_acquire_irqs + + task acquire_bursts(); + automatic result_t current_result; + automatic idma_req_t current_burst; + @(posedge rst_n); + forever begin + forever begin + @(posedge clk); + #(ACQ_DELAY); + if (dma_be_req_valid && dma_be_req_ready) break; + end + current_burst = dma_be_req; + wait (inflight_results_after_reads.size() > 0); + current_result = inflight_results_after_reads.pop_front(); + current_result.burst = current_burst; + inflight_results_submitted_to_be.push_back(current_result); + end + endtask + + // score the results + initial begin : proc_scoring + static logic finished_simulation = 1'b0; + + static int number_of_descriptors = 0; + static int read_addr_errors = 0; + static int read_length_errors = 0; + static int read_size_errors = 0; + static int write_addr_errors = 0; + static int write_length_errors = 0; + static int write_data_errors = 0; + static int write_size_errors = 0; + static int burst_errors = 0; + static int irq_errors = 0; + + generate_stimuli(); + load_descriptors_into_memory(); + + fork + apply_stimuli(); + collect_responses(); + begin : watchdog + @(posedge rst_n); + repeat (SimulationTimeoutCycles) begin + @(posedge clk); + end + end : watchdog + begin : scorer + @(posedge rst_n); + + while (golden_queue.size() > '0) begin + automatic result_t golden; + automatic result_t actual; + wait (result_queue.size() > 0); + golden = golden_queue.pop_front(); + actual = result_queue.pop_front(); + if (golden.burst !== actual.burst) begin + $error("Burst mismatch @ %d:\ngolden: %p\nactual: %p", + number_of_descriptors, golden.burst, actual.burst); + ++burst_errors; + end + if (golden.read_address !== actual.read_address) begin + $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_address, actual.read_address); + ++read_addr_errors; + end + if (golden.read_length !== actual.read_length) begin + $error("Read length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_length, actual.read_length); + ++read_length_errors; + end + if (golden.read_size !== actual.read_size) begin + $error("Read size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_size, actual.read_size); + ++read_size_errors; + end + if (golden.write_address !== actual.write_address) begin + $error("Write address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_address, actual.write_address); + ++write_addr_errors; + end + if (golden.write_length !== actual.write_length) begin + $error("Write length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_length, actual.write_length); + ++write_length_errors; + end + if (golden.write_size !== actual.write_size) begin + $error("Write size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_size, actual.write_size); + ++write_size_errors; + end + if (golden.write_data !== actual.write_data) begin + $error("Write data mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_data, actual.write_data); + ++write_data_errors; + end + if (golden.did_irq !== actual.did_irq) begin + $error("IRQ mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.did_irq, actual.did_irq); + ++irq_errors; + end + ++number_of_descriptors; + end + // wait for frontend to signal no longer busy + forever begin + automatic logic [63:0] status; + automatic logic error; + i_reg_iface_driver.send_read( + .addr(IDMA_DESC64_STATUS_OFFSET), + .data(status), + .error(error) + ); + if (status[0] != 1'b1) break; + end + finished_simulation = 1'b1; + end : scorer + join_any + disable fork; + if (!finished_simulation) begin + $error("Simulation timed out."); + end else begin + $display("Simulation finished in a timely manner."); + end + $display("Saw %d descriptors." , number_of_descriptors); + $display("Read address errors: %d", read_addr_errors); + $display("Read length errors: %d", read_length_errors); + $display("Read size errors: %d", read_size_errors); + $display("Write address errors: %d", write_addr_errors); + $display("Write length errors: %d", write_length_errors); + $display("Write size errors: %d", write_size_errors); + $display("Write data errors: %d", write_data_errors); + $display("Burst errors: %d", burst_errors); + $display("IRQ errors: %d", irq_errors); + $finish(); + end : proc_scoring +endmodule : tb_idma_desc64_bench diff --git a/test/frontends/tb_idma_desc64_top.sv b/test/frontends/tb_idma_desc64_top.sv index c4714ef1..e55e1062 100644 --- a/test/frontends/tb_idma_desc64_top.sv +++ b/test/frontends/tb_idma_desc64_top.sv @@ -7,6 +7,8 @@ `include "register_interface/typedef.svh" `include "register_interface/assign.svh" `include "idma/typedef.svh" +`include "axi/typedef.svh" +`include "axi/assign.svh" import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; import idma_desc64_reg_pkg::IDMA_DESC64_STATUS_OFFSET; @@ -15,11 +17,15 @@ import axi_pkg::*; import reg_test::reg_driver; module tb_idma_desc64_top #( - parameter integer NumberOfTests = 100, - parameter integer SimulationTimeoutCycles = 100000, - parameter integer MaxChainedDescriptors = 10, - parameter integer MinChainedDescriptors = 1 - + parameter int unsigned NumberOfTests = 100, + parameter int unsigned SimulationTimeoutCycles = 100000, + parameter int signed ChainedDescriptors = -1, + parameter int unsigned MaxChainedDescriptors = 10, + parameter int unsigned MinChainedDescriptors = 1, + parameter int unsigned InputFifoDepth = 8, + parameter int unsigned PendingFifoDepth = 8, + parameter int unsigned BackendDepth = 5, + parameter int unsigned MaxAWWPending = 8 ) (); localparam time PERIOD = 10ns; localparam time APPL_DELAY = PERIOD / 4; @@ -27,10 +33,15 @@ module tb_idma_desc64_top #( localparam integer RESET_CYCLES = 10; - `REG_BUS_TYPEDEF_ALL(reg, /* addr */ logic [63:0], /* data */ logic [63:0], /* strobe */ logic [7:0]) - typedef logic [63:0] addr_t; typedef logic [ 2:0] axi_id_t; + typedef axi_test::axi_ax_beat #(.AW(64), .IW(3), .UW(1)) ax_beat_t; + typedef axi_test::axi_r_beat #(.DW(64), .IW(3), .UW(1)) r_beat_t; + typedef axi_test::axi_w_beat #(.DW(64), .UW(1)) w_beat_t; + typedef axi_test::axi_b_beat #(.IW(3), .UW(1)) b_beat_t; + + `REG_BUS_TYPEDEF_ALL(reg, /* addr */ addr_t, /* data */ logic [63:0], /* strobe */ logic [7:0]) + `AXI_TYPEDEF_ALL(axi, /* addr */ addr_t, /* id */ axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) // iDMA struct definitions localparam int unsigned TFLenWidth = 32; @@ -44,31 +55,35 @@ module tb_idma_desc64_top #( rand addr_t base; rand idma_req_t burst; rand logic do_irq; - addr_t next = ~64'b0; + addr_t next = 64'hffff_ffff_ffff_ffff; // an entire descriptor of 4 words must fit before the end of memory - constraint descriptor_fits_in_memory { ~64'b0 - base > 32; } - constraint no_empty_transfers { burst.length > 0; } - constraint src_fits_in_memory { ~64'b0 - burst.src_addr > burst.length; } - constraint dst_fits_in_memory { ~64'b0 - burst.dst_addr > burst.length; } + constraint descriptor_fits_in_memory { (64'hffff_ffff_ffff_ffff - base) > 64'd32; } + constraint no_empty_transfers { burst.length > '0; } + constraint src_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.src_addr > burst.length; } + constraint dst_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.dst_addr > burst.length; } constraint src_burst_valid { burst.opt.src.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } constraint dst_burst_valid { burst.opt.dst.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } constraint reduce_len_equal { burst.opt.beo.src_reduce_len == burst.opt.beo.dst_reduce_len; } + constraint reduce_len_zero { burst.opt.beo.src_reduce_len == 1'b0; } constraint beo_zero { burst.opt.beo.decouple_aw == '0 && burst.opt.beo.src_max_llen == '0 && burst.opt.beo.dst_max_llen == '0 && burst.opt.last == '0; } constraint axi_params_zero_src { burst.opt.src.lock == '0 && burst.opt.src.prot == '0 && burst.opt.src.qos == '0 && burst.opt.src.region == '0; } constraint axi_params_zero_dst { burst.opt.dst.lock == '0 && burst.opt.dst.prot == '0 && burst.opt.dst.qos == '0 && burst.opt.dst.region == '0; } endclass typedef struct { - idma_req_t burst; - addr_t read_addresses[4]; + idma_req_t burst; + addr_t read_address; + logic [7:0] read_length; + logic [2:0] read_size; addr_t write_address; + logic [7:0] write_length; + logic [2:0] write_size; logic [63:0] write_data; logic did_irq; } result_t; result_t golden_queue[$]; - // clocks logic clk; logic rst_n; @@ -94,37 +109,70 @@ module tb_idma_desc64_top #( .TT(ACQ_DELAY) ) i_reg_iface_driver = new (i_reg_iface_bus); - reg_rsp_t dma_master_response; - reg_req_t dma_master_request; + axi_resp_t dma_master_response; + axi_req_t dma_master_request; + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_iface_bus (clk); + + axi_test::axi_driver #( + .AW(64), + .DW(64), + .IW(3), + .UW(1), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_axi_iface_driver = new (i_axi_iface_bus); + reg_rsp_t dma_slave_response; reg_req_t dma_slave_request; idma_req_t dma_be_req; - logic dma_be_tx_complete; - logic dma_be_idle; - logic dma_be_valid; - logic dma_be_ready; + logic backend_busy; + logic dma_be_req_valid; + logic dma_be_req_ready; + logic dma_be_rsp_valid; + logic dma_be_rsp_ready; logic irq; idma_desc64_top #( - .AddrWidth (64), - .burst_req_t (idma_req_t), - .reg_rsp_t (reg_rsp_t), - .reg_req_t (reg_req_t) + .AddrWidth (64), + .DataWidth (64), + .AxiIdWidth (3), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .axi_rsp_t (axi_resp_t), + .axi_req_t (axi_req_t), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_r_chan_t (axi_r_chan_t), + .reg_rsp_t (reg_rsp_t), + .reg_req_t (reg_req_t), + .InputFifoDepth (InputFifoDepth), + .PendingFifoDepth(PendingFifoDepth), + .BackendDepth (BackendDepth), + .MaxAWWPending (MaxAWWPending) ) i_dut ( - .clk_i (clk), - .rst_ni (rst_n), - .master_rsp_i (dma_master_response), - .master_req_o (dma_master_request), - .slave_req_i (dma_slave_request), - .slave_rsp_o (dma_slave_response), - .dma_be_tx_complete_i(dma_be_tx_complete), - .dma_be_idle_i (dma_be_idle), - .dma_be_valid_o (dma_be_valid), - .dma_be_ready_i (dma_be_ready), - .dma_be_req_o (dma_be_req), - .irq_o (irq) + .clk_i (clk), + .rst_ni (rst_n), + .master_req_o (dma_master_request), + .master_rsp_i (dma_master_response), + .axi_ar_id_i (3'b111), + .axi_aw_id_i (3'b111), + .slave_req_i (dma_slave_request), + .slave_rsp_o (dma_slave_response), + .idma_req_o (dma_be_req), + .idma_req_valid_o(dma_be_req_valid), + .idma_req_ready_i(dma_be_req_ready), + .idma_rsp_i ('0), + .idma_rsp_valid_i(dma_be_rsp_valid), + .idma_rsp_ready_o(dma_be_rsp_ready), + .idma_busy_i (backend_busy), + .irq_o (irq) ); assign dma_slave_request.addr = i_reg_iface_bus.addr; @@ -136,20 +184,22 @@ module tb_idma_desc64_top #( assign i_reg_iface_bus.ready = dma_slave_response.ready; assign i_reg_iface_bus.error = dma_slave_response.error; + `AXI_ASSIGN_FROM_REQ(i_axi_iface_bus, dma_master_request); + `AXI_ASSIGN_TO_RESP(dma_master_response, i_axi_iface_bus); + initial begin - dma_master_response = '0; - dma_be_tx_complete = '0; - dma_be_ready = '0; + i_axi_iface_driver.reset_slave(); + dma_be_rsp_valid = 1'b0; + dma_be_req_ready = 1'b0; + backend_busy = 1'b0; end // queues for communication and data transfer stimulus_t generated_stimuli[$][$]; stimulus_t inflight_stimuli[$][$]; - logic inflight_be_tokens[$]; result_t inflight_results_after_reads[$]; result_t inflight_results_submitted_to_be[$]; result_t result_queue[$]; - assign dma_be_idle = inflight_be_tokens.size() == 0; function automatic void generate_stimuli(); repeat (NumberOfTests) begin @@ -157,10 +207,14 @@ module tb_idma_desc64_top #( automatic stimulus_t current_stimuli_group[$]; automatic int number_of_descriptors_in_test; - void'(std::randomize(number_of_descriptors_in_test) with { - number_of_descriptors_in_test >= MinChainedDescriptors; - number_of_descriptors_in_test <= MaxChainedDescriptors; - }); + if (ChainedDescriptors < 0) begin + void'(std::randomize(number_of_descriptors_in_test) with { + number_of_descriptors_in_test >= MinChainedDescriptors; + number_of_descriptors_in_test <= MaxChainedDescriptors; + }); + end else begin + number_of_descriptors_in_test = ChainedDescriptors; + end current_stimulus = new(); if (!current_stimulus.randomize()) begin @@ -169,16 +223,21 @@ module tb_idma_desc64_top #( current_stimuli_group.push_back(current_stimulus); golden_queue.push_back('{ - burst: current_stimulus.burst, - read_addresses: '{ - // descriptor is four contiguous 64-bit words - current_stimulus.base, - current_stimulus.base + 8, - current_stimulus.base + 16, - current_stimulus.base + 24 - }, + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + write_address: current_stimulus.base, - write_data: ~64'b0, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + did_irq: current_stimulus.do_irq }); end @@ -194,16 +253,21 @@ module tb_idma_desc64_top #( current_stimuli_group.push_back(current_stimulus); golden_queue.push_back('{ - burst: current_stimulus.burst, - read_addresses: '{ - // descriptor is four contiguous 64-bit words - current_stimulus.base, - current_stimulus.base + 8, - current_stimulus.base + 16, - current_stimulus.base + 24 - }, + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + write_address: current_stimulus.base, - write_data: ~64'b0, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + did_irq: current_stimulus.do_irq }); end @@ -219,7 +283,8 @@ module tb_idma_desc64_top #( task apply_stimuli(); fork regbus_slave_interaction(); - regbus_master_apply_reads_and_writes(); + axi_master_apply_read_channel(); + axi_master_apply_write_channel(); backend_tx_done_notifier(); backend_acceptor(); join @@ -227,9 +292,8 @@ module tb_idma_desc64_top #( task collect_responses(); fork - regbus_master_acquire_reads(); - regbus_master_acquire_writes_and_irqs(); - backend_submission_monitor(); + axi_master_aquire_ars(); + axi_master_acquire_aw_w_and_irqs(); acquire_bursts(); join endtask @@ -246,24 +310,15 @@ module tb_idma_desc64_top #( automatic logic error; wait (generated_stimuli.size() > '0); + current_stimulus_group = generated_stimuli.pop_front(); - i_reg_iface_driver.send_read( - .addr (IDMA_DESC64_STATUS_OFFSET), - .data (status) , + i_reg_iface_driver.send_write( + .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , + .data (current_stimulus_group[0].base), + .strb (8'hff) , .error(error) ); - if ((status & 64'b10) == 64'b0) begin - // the fifos are not full yet, so we can submit - current_stimulus_group = generated_stimuli.pop_front(); - - i_reg_iface_driver.send_write( - .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , - .data (current_stimulus_group[0].base), - .strb (8'hff) , - .error(error) - ); - inflight_stimuli.push_back(current_stimulus_group); - end + inflight_stimuli.push_back(current_stimulus_group); end endtask @@ -293,7 +348,7 @@ module tb_idma_desc64_top #( flags[2:1] = stim.burst.opt.src.burst; flags[4:3] = stim.burst.opt.dst.burst; flags[5] = stim.burst.opt.beo.decouple_rw; - flags[6] = '0; + flags[6] = 1'b0; // flags[6] = stim.burst.opt.beo.serialize; flags[7] = stim.burst.opt.beo.src_reduce_len; flags[11:8] = stim.burst.opt.src.cache; @@ -305,96 +360,89 @@ module tb_idma_desc64_top #( result[63:32] = flags; return result; endfunction + + task axi_master_apply_write_channel(); + @(posedge rst_n); + forever begin + automatic ax_beat_t aw_beat; + automatic w_beat_t w_beat; + automatic b_beat_t b_beat; + // receive and acknowledge all writes + @(posedge clk); + i_axi_iface_driver.recv_aw(aw_beat); + @(posedge clk); + i_axi_iface_driver.recv_w(w_beat); + // all writes succeed + b_beat = new; + b_beat.b_id = aw_beat.ax_id; + @(posedge clk); + i_axi_iface_driver.send_b(b_beat); + end + endtask + // regbus master interaction read and write application (we're acting as slave) - task regbus_master_apply_reads_and_writes(); + task axi_master_apply_read_channel(); automatic stimulus_t current_stimulus_group[$]; automatic stimulus_t current_stimulus; - automatic int read_index; @(posedge rst_n); - dma_master_response.ready = '0; - dma_master_response.rdata = '0; - dma_master_response.error = '0; wait (inflight_stimuli.size() > 0); current_stimulus_group = inflight_stimuli.pop_front(); current_stimulus = current_stimulus_group.pop_front(); forever begin - automatic addr_t read_addr; - automatic logic [63:0] read_result; - + automatic ax_beat_t ar_beat; + automatic r_beat_t r_beat; @(posedge clk); - #(APPL_DELAY); - dma_master_response.ready = 1'b0; + i_axi_iface_driver.recv_ar(ar_beat); - wait (dma_master_request.valid); - @(posedge clk) - #(APPL_DELAY); - if (!dma_master_request.write) begin - // we have read everything from this stimulus packet, go to the - // next one - if (read_index == 4) begin - // get the next transfer group if we are done with the current group - if (current_stimulus_group.size() == '0) begin - wait (inflight_stimuli.size() > '0); - current_stimulus_group = inflight_stimuli.pop_front(); - end + // send the descriptor + r_beat = new; + r_beat.r_id = ar_beat.ax_id; + r_beat.r_data = stimulus_to_flag_bits(current_stimulus); + i_axi_iface_driver.send_r(r_beat); - current_stimulus = current_stimulus_group.pop_front(); - read_index = 0; - end + if (current_stimulus_group.size() == '0) begin + r_beat.r_data = 64'hffff_ffff_ffff_ffff; + end else begin + r_beat.r_data = current_stimulus_group[0].base; + end + i_axi_iface_driver.send_r(r_beat); - case (read_index) - 0: begin : flags_and_length - dma_master_response.rdata = stimulus_to_flag_bits(current_stimulus); - end : flags_and_length - 1: begin : next - if (current_stimulus_group.size() == '0) begin - dma_master_response.rdata = ~64'b0; - end else begin - dma_master_response.rdata = current_stimulus_group[0].base; - end - end : next - 2: begin : src - dma_master_response.rdata = current_stimulus.burst.src_addr; - end : src - 3: begin : dst - dma_master_response.rdata = current_stimulus.burst.dst_addr; - end : dst - default: begin - $error("The regbus master block reached an inconsistent state (%d)", read_index); - end - endcase - ++read_index; + r_beat.r_data = current_stimulus.burst.src_addr; + i_axi_iface_driver.send_r(r_beat); + + r_beat.r_data = current_stimulus.burst.dst_addr; + r_beat.r_last = 'b1; + i_axi_iface_driver.send_r(r_beat); + + // get the next transfer group if we are done with the current group + if (current_stimulus_group.size() == '0) begin + wait (inflight_stimuli.size() > '0); + current_stimulus_group = inflight_stimuli.pop_front(); end - dma_master_response.ready = 1'b1; + + current_stimulus = current_stimulus_group.pop_front(); end endtask - task regbus_master_acquire_reads(); - automatic int read_index = '0; - automatic result_t current_result; + task axi_master_aquire_ars(); @(posedge rst_n); forever begin - // wait for a read request - forever begin - @(posedge clk); - #(ACQ_DELAY); - if (dma_master_request.valid && - dma_master_response.ready && - !dma_master_request.write) break; - end - current_result.read_addresses[read_index] = dma_master_request.addr; - read_index++; - if (read_index == 4) begin - read_index = 0; - inflight_results_after_reads.push_back(current_result); - end + automatic ax_beat_t ar_beat; + automatic result_t current_result; + // monitor ar + i_axi_iface_driver.mon_ar(ar_beat); + // and record contents + current_result.read_address = ar_beat.ax_addr; + current_result.read_length = ar_beat.ax_len; + current_result.read_size = ar_beat.ax_size; + inflight_results_after_reads.push_back(current_result); end endtask - task regbus_master_acquire_writes_and_irqs(); + task axi_master_acquire_aw_w_and_irqs(); // set to one to skip first submission of what would be an invalid result automatic bit captured_irq = '1; automatic result_t current_result; @@ -402,14 +450,22 @@ module tb_idma_desc64_top #( wait (inflight_results_submitted_to_be.size() > 0); current_result = inflight_results_submitted_to_be.pop_front(); forever begin - forever begin - @(posedge clk); - #(ACQ_DELAY); - if ((dma_master_request.valid && - dma_master_response.ready && - dma_master_request.write) || - irq) break; - end + automatic ax_beat_t aw_beat; + automatic w_beat_t w_beat; + @(posedge clk); + // wait for either an irq or an aw_beat + fork + forever begin + #(ACQ_DELAY); + if (irq) begin + break; + end + @(posedge clk); + end + i_axi_iface_driver.mon_aw(aw_beat); + join_any + disable fork; + if (irq) begin if (captured_irq) begin $error("Got a duplicate IRQ!"); @@ -429,42 +485,31 @@ module tb_idma_desc64_top #( wait (inflight_results_submitted_to_be.size() > 0); current_result = inflight_results_submitted_to_be.pop_front(); end - current_result.write_address = dma_master_request.addr; - current_result.write_data = dma_master_request.wdata; + current_result.write_address = aw_beat.ax_addr; + current_result.write_length = aw_beat.ax_len; + current_result.write_size = aw_beat.ax_size; captured_irq = 1'b0; + i_axi_iface_driver.mon_w(w_beat); + current_result.write_data = w_beat.w_data; end end endtask - task backend_submission_monitor(); - @(posedge rst_n); - forever begin - forever begin - @(posedge clk); - #(ACQ_DELAY); - if (dma_be_valid && dma_be_ready) break; - end - // annotate that a job has entered the backend - inflight_be_tokens.push_back(1'b1); - end - endtask - task backend_tx_done_notifier(); @(posedge rst_n); forever begin - wait (inflight_be_tokens.size() > 0); - - // remove token, as we handled the request - void'(inflight_be_tokens.pop_front()); + wait (backend_busy); rand_wait(5, 20, clk); #(APPL_DELAY); - dma_be_tx_complete = 1'b1; + dma_be_rsp_valid = 1'b1; + wait (dma_be_rsp_ready); @(posedge clk); #(APPL_DELAY); - dma_be_tx_complete = 1'b0; + dma_be_rsp_valid = 1'b0; + backend_busy = 1'b0; end endtask @@ -476,7 +521,7 @@ module tb_idma_desc64_top #( forever begin @(posedge clk); #(ACQ_DELAY); - if (dma_be_valid && dma_be_ready) break; + if (dma_be_req_valid && dma_be_req_ready) break; end current_burst = dma_be_req; wait (inflight_results_after_reads.size() > 0); @@ -487,27 +532,39 @@ module tb_idma_desc64_top #( endtask task backend_acceptor(); - automatic result_t current_result; @(posedge rst_n); forever begin - wait (dma_be_valid); + wait (!backend_busy); @(posedge clk); #(APPL_DELAY) - dma_be_ready = 1'b1; + dma_be_req_ready = 1'b1; + #(ACQ_DELAY - APPL_DELAY); + forever begin + if (dma_be_req_valid) begin + break; + end + @(posedge clk); + #(ACQ_DELAY); + end @(posedge clk); #(APPL_DELAY) - dma_be_ready = 1'b0; + dma_be_req_ready = 1'b0; + backend_busy = 1'b1; end endtask // score the results initial begin : proc_scoring - static logic finished_simulation = '0; + static logic finished_simulation = 1'b0; static int number_of_descriptors = 0; - static int read_errors = 0; + static int read_addr_errors = 0; + static int read_length_errors = 0; + static int read_size_errors = 0; static int write_addr_errors = 0; + static int write_length_errors = 0; static int write_data_errors = 0; + static int write_size_errors = 0; static int burst_errors = 0; static int irq_errors = 0; @@ -536,18 +593,36 @@ module tb_idma_desc64_top #( number_of_descriptors, golden.burst, actual.burst); ++burst_errors; end - foreach (golden.read_addresses[i]) begin - if (golden.read_addresses[i] !== actual.read_addresses[i]) begin - $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", - number_of_descriptors, golden.read_addresses[i], actual.read_addresses[i]); - ++read_errors; - end + if (golden.read_address !== actual.read_address) begin + $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_address, actual.read_address); + ++read_addr_errors; + end + if (golden.read_length !== actual.read_length) begin + $error("Read length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_length, actual.read_length); + ++read_length_errors; + end + if (golden.read_size !== actual.read_size) begin + $error("Read size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_size, actual.read_size); + ++read_size_errors; end if (golden.write_address !== actual.write_address) begin $error("Write address mismatch @ %d:\ngolden: %x\nactual: %x", number_of_descriptors, golden.write_address, actual.write_address); ++write_addr_errors; end + if (golden.write_length !== actual.write_length) begin + $error("Write length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_length, actual.write_length); + ++write_length_errors; + end + if (golden.write_size !== actual.write_size) begin + $error("Write size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_size, actual.write_size); + ++write_size_errors; + end if (golden.write_data !== actual.write_data) begin $error("Write data mismatch @ %d:\ngolden: %x\nactual: %x", number_of_descriptors, golden.write_data, actual.write_data); @@ -571,7 +646,7 @@ module tb_idma_desc64_top #( ); if (status[0] != 1'b1) break; end - finished_simulation = 1; + finished_simulation = 1'b1; end : scorer join_any disable fork; @@ -580,8 +655,13 @@ module tb_idma_desc64_top #( end else begin $display("Simulation finished in a timely manner."); end - $display("Read address errors: %d", read_errors); + $display("Saw %d descriptors." , number_of_descriptors); + $display("Read address errors: %d", read_addr_errors); + $display("Read length errors: %d", read_length_errors); + $display("Read size errors: %d", read_size_errors); $display("Write address errors: %d", write_addr_errors); + $display("Write length errors: %d", write_length_errors); + $display("Write size errors: %d", write_size_errors); $display("Write data errors: %d", write_data_errors); $display("Burst errors: %d", burst_errors); $display("IRQ errors: %d", irq_errors); diff --git a/test/tb_idma_backend.sv b/test/tb_idma_backend.sv index 8219c3b2..db705500 100644 --- a/test/tb_idma_backend.sv +++ b/test/tb_idma_backend.sv @@ -7,6 +7,7 @@ `timescale 1ns/1ns `include "axi/typedef.svh" `include "idma/typedef.svh" +`include "idma/tracer.svh" module tb_idma_backend import idma_pkg::*; #( parameter int unsigned BufferDepth = 3, @@ -25,7 +26,8 @@ module tb_idma_backend import idma_pkg::*; #( parameter bit HardwareLegalizer = 1, parameter bit RejectZeroTransfers = 1, parameter bit ErrorHandling = 1, - parameter bit IdealMemory = 1 + parameter bit IdealMemory = 1, + parameter bit DmaTracing = 0 ); // timing parameters @@ -105,6 +107,21 @@ module tb_idma_backend import idma_pkg::*; #( idma_busy_t busy; + //-------------------------------------- + // DMA Tracer + //-------------------------------------- + // only activate tracer if requested + if (DmaTracing) begin + // fetch the name of the trace file from CMD line + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + // attach the tracer + `IDMA_TRACER(i_idma_backend, trace_file); + end + + //-------------------------------------- // DMA Driver //-------------------------------------- diff --git a/test/tb_idma_nd_backend.sv b/test/tb_idma_nd_backend.sv index be82339e..7f32808f 100644 --- a/test/tb_idma_nd_backend.sv +++ b/test/tb_idma_nd_backend.sv @@ -7,6 +7,7 @@ `timescale 1ns/1ns `include "axi/typedef.svh" `include "idma/typedef.svh" +`include "idma/tracer.svh" module tb_idma_nd_backend import idma_pkg::*; #( parameter int unsigned BufferDepth = 3, @@ -28,7 +29,8 @@ module tb_idma_nd_backend import idma_pkg::*; #( parameter bit HardwareLegalizer = 1, parameter bit RejectZeroTransfers = 1, parameter bit ErrorHandling = 1, - parameter bit IdealMemory = 1 + parameter bit IdealMemory = 1, + parameter bit DmaTracing = 0 ); // timing parameters @@ -123,6 +125,21 @@ module tb_idma_nd_backend import idma_pkg::*; #( idma_busy_t busy; + //-------------------------------------- + // DMA Tracer + //-------------------------------------- + // only activate tracer if requested + if (DmaTracing) begin + // fetch the name of the trace file from CMD line + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + // attach the tracer + `IDMA_TRACER(i_idma_backend, trace_file); + end + + //-------------------------------------- // DMA Driver //-------------------------------------- diff --git a/util/trace.py b/util/trace.py new file mode 100644 index 00000000..b74c8563 --- /dev/null +++ b/util/trace.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Author: Thomas Benz + +"""Functions used to parse and evaluate iDMA trace files.""" +import ast +import sys +from pprint import pprint as pp + + +def strb_to_bytes(strobe: int) -> int: + """Returns the amount of valid bytes in a strobe value""" + + res = 0 + + # iterate over strobe + for byte_en in str(bin(strobe))[2:]: + if byte_en == '1': + res += 1 + + return res + + +def read_trace (fn: str) -> list: + """Reads a trace file and returns it as a list of dict objects""" + + # resulting list of trace events + trace = [] + + # read and parse file + with open(fn, 'r', encoding='utf8') as tf: + for line in tf: + trace_dict = ast.literal_eval(line) + trace.append(trace_dict) + + return trace + + +def extract_parameter (trace: list) -> dict: + """Extracts the parameter of the DMA backend the run resulted from""" + + return trace[0]['cnst'] + + +def get_global_utilization (trace: list, data_width: int) -> list: + """Calculates the global utilization [read, write] of the DMA""" + + read_data = 0 # in bytes + write_data = 0 # in bytes + + for ele in trace: + # add read contribution + if ele['axib']['r_ready'] and ele['axib']['r_valid']: + read_data += data_width // 8 + + # add write contribution + if ele['axib']['w_ready'] and ele['axib']['w_valid']: + write_data += strb_to_bytes(ele['axib']['w_strb']) + + # calculate maximum possible amount of data + max_data = len(trace) * data_width // 8 + + return [read_data / max_data, write_data / max_data ] + + +if __name__ == '__main__': + _, filename = sys.argv + idma_trace = read_trace(filename) + idma_data_width = extract_parameter(idma_trace)['data_width'] + pp(get_global_utilization(idma_trace, idma_data_width)) diff --git a/verilator/scripts/preprocess.py b/verilator/scripts/preprocess.py index c1a208c3..45453b3d 100644 --- a/verilator/scripts/preprocess.py +++ b/verilator/scripts/preprocess.py @@ -9,7 +9,9 @@ import sys WHITE_LIST = ['fifo_v3', 'stream_fifo', 'spill_register', 'popcount', 'stream_fork', 'fifo_v2', - 'axi_pkg', 'cf_math', 'fall_through_register', 'idma_', '+define+', '+incdir+'] + 'axi_pkg', 'cf_math', 'fall_through_register', 'idma_', '+define+', '+incdir+', 'dma_desc', + 'prim_subreg', 'axi_mux', 'axi_to', 'axi_lite_to', 'axi_id', 'rr_arb_', 'lzc', 'stream_', + 'axi_atop', 'axi_burst', 'id_queue', 'axi_demux', 'axi_err', 'common_cells'] _, inp_file = sys.argv