From 2af8d3a6fef5ceec2319052df398f15d7732978f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 21 Nov 2023 20:20:58 +0000 Subject: [PATCH 01/10] AXI stream data width converter for integer ratios. --- finn-rtllib/dwc/hdl/dwc_axi.sv | 158 ++++++++++++++++++++++++ finn-rtllib/dwc/sim/dwc_axi_tb.sv | 195 ++++++++++++++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100644 finn-rtllib/dwc/hdl/dwc_axi.sv create mode 100644 finn-rtllib/dwc/sim/dwc_axi_tb.sv diff --git a/finn-rtllib/dwc/hdl/dwc_axi.sv b/finn-rtllib/dwc/hdl/dwc_axi.sv new file mode 100644 index 0000000000..ea52b9ed24 --- /dev/null +++ b/finn-rtllib/dwc/hdl/dwc_axi.sv @@ -0,0 +1,158 @@ +/****************************************************************************** + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief AXI Stream Data Width Converter. + * @author Thomas B. Preußer + *****************************************************************************/ +module dwc_axi #( + int unsigned IBITS, + int unsigned OBITS +)( + //- Global Control ------------------ + input logic clk, + input logic rst, + + //- AXI Stream - Input -------------- + output logic s_axis_tready, + input logic s_axis_tvalid, + input logic [IBITS-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input logic m_axis_tready, + output logic m_axis_tvalid, + output logic [OBITS-1:0] m_axis_tdata +); + + if(IBITS == OBITS) begin : genNoop + assign s_axis_tready = m_axis_tready; + assign m_axis_tvalid = s_axis_tvalid; + assign m_axis_tdata = s_axis_tdata; + end : genNoop + else if(IBITS < OBITS) begin : genUp + + // Sanity Checking: integer upscaling + initial begin + if(OBITS % IBITS) begin + $error("Output width %0d is not a multiple of input width %0d.", OBITS, IBITS); + $finish; + end + end + + // Parallelizing Shift Register A and Sidestep Buffer B on Input Path + localparam int unsigned K = OBITS / IBITS; + typedef logic [IBITS-1:0] dat_t; + dat_t [K-1:0] ADat = 'x; + logic [$clog2(K):0] ACnt = K-1; // (empty) K-1, ..., 0, -1 (full/valid) + dat_t BDat = 'x; + logic BRdy = 1; + always_ff @(posedge clk) begin + if(rst) begin + ADat <= 'x; + ACnt <= K-1; + BDat <= 'x; + BRdy <= 1; + end + else begin + automatic type(ACnt) acnt = (m_axis_tvalid && m_axis_tready)? K-1 : ACnt; + automatic logic rdy = !m_axis_tvalid || m_axis_tready; + if((s_axis_tvalid || !BRdy) && rdy) begin + ADat <= { BRdy? s_axis_tdata : BDat, ADat[K-1:1] }; + acnt--; + end + ACnt <= acnt; + + if(BRdy) BDat <= s_axis_tdata; + BRdy <= rdy || (BRdy && !s_axis_tvalid); + end + end + + // Output Assignments + assign s_axis_tready = BRdy; + assign m_axis_tvalid = ACnt[$left(ACnt)]; + assign m_axis_tdata = ADat; + + end : genUp + else begin : genDown + + // Sanity Checking: integer downscaling + initial begin + if(IBITS % OBITS) begin + $error("Input width %0d is not a multiple of output width %0d.", IBITS, OBITS); + $finish; + end + end + + // Serializing Shift Register A and Sidestep Buffer B on Output Path + localparam int unsigned K = IBITS / OBITS; + typedef logic [OBITS-1:0] dat_t; + dat_t [ K-1:0] ADat = 'x; + logic [$clog2(K):0] ACnt = 1; // (full) -K+1, ..., -1, 0, 1 (empty/not valid) + dat_t BDat = 'x; + logic BRdy = 1; + dat_t CDat = 'x; + logic CVld = 0; + always_ff @(posedge clk) begin + if(rst) begin + ADat <= 'x; + ACnt <= 1; + BDat <= 'x; + BRdy <= 1; + CDat <= 'x; + CVld <= 0; + end + else begin + automatic type(ACnt) acnt = ACnt; + automatic logic ainc = 0; + if(s_axis_tready) begin + ADat <= s_axis_tdata; + acnt = s_axis_tvalid? -K+1 : 1; + end + else if(BRdy) begin + ADat <= { {OBITS{1'bx}}, ADat[K-1:1] }; + ainc = BRdy; + end; + ACnt <= acnt + ainc; + + if(BRdy) BDat <= ADat[0]; + BRdy <= !CVld || m_axis_tready || (BRdy && !ACnt[$left(ACnt)] && ACnt[0]); + + if(!CVld || m_axis_tready) CDat <= BRdy? ADat[0] : BDat; + CVld <= (CVld && !m_axis_tready) || !BRdy || ACnt[$left(ACnt)] || !ACnt[0]; + end + end + + // Output Assignments + assign s_axis_tready = BRdy && !ACnt[$left(ACnt)]; + assign m_axis_tvalid = CVld; + assign m_axis_tdata = CDat; + + end : genDown + +endmodule : dwc_axi diff --git a/finn-rtllib/dwc/sim/dwc_axi_tb.sv b/finn-rtllib/dwc/sim/dwc_axi_tb.sv new file mode 100644 index 0000000000..6bc3249685 --- /dev/null +++ b/finn-rtllib/dwc/sim/dwc_axi_tb.sv @@ -0,0 +1,195 @@ +/****************************************************************************** + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Testbench for AXI Stream Data Width Converter. + * @author Thomas B. Preußer + *****************************************************************************/ +module dwc_axi_tb; + + localparam int unsigned DBITS = 4; + localparam int unsigned K = 3; + typedef logic [DBITS-1:0] dat_t; + + // Global Control + logic clk = 0; + always #5ns clk = !clk; + logic rst = 1; + initial begin + repeat(8) @(posedge clk); + rst <= 0; + end + + if(1) begin : blkUp + localparam int unsigned IBITS = DBITS; + localparam int unsigned OBITS = K * DBITS; + + //- AXI Stream - Input -------------- + uwire s_axis_tready; + logic s_axis_tvalid; + dat_t s_axis_tdata; + + //- AXI Stream - Output ------------- + logic m_axis_tready; + uwire m_axis_tvalid; + dat_t [K-1:0] m_axis_tdata; + + dwc_axi #(.IBITS(IBITS), .OBITS(OBITS)) dut ( + .clk, .rst, + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + + // Stimulus: Feed + dat_t Q[$]; + initial begin + s_axis_tvalid = 0; + s_axis_tdata = 'x; + @(posedge clk iff !rst); + + repeat(57600) begin + automatic type(s_axis_tdata) dat; + std::randomize(dat); + + while($urandom()%7 < 2) @(posedge clk); + + s_axis_tvalid <= 1; + s_axis_tdata <= dat; + @(posedge clk iff s_axis_tready); + Q.push_back(dat); + + s_axis_tvalid <= 0; + s_axis_tdata <= 'x; + end + + repeat(16) @(posedge clk); + $finish; + end + + // Output Sink + initial begin + m_axis_tready = 0; + @(posedge clk iff !rst); + + forever begin + automatic dat_t [K-1:0] dat; + + while($urandom()%9 < 1) @(posedge clk); + + m_axis_tready <= 1; + @(posedge clk iff m_axis_tvalid); + assert(Q.size >= K) else begin + $error("Spurious output."); + $stop; + end + for(int unsigned i = 0; i < K; i++) dat[i] = Q.pop_front(); + assert(m_axis_tdata == dat) else begin + $error("Output mismatch."); + $stop; + end + + m_axis_tready <= 0; + end + end + end : blkUp + + if(1) begin : blkDown + localparam int unsigned IBITS = K * DBITS; + localparam int unsigned OBITS = DBITS; + + //- AXI Stream - Input -------------- + uwire s_axis_tready; + logic s_axis_tvalid; + dat_t [K-1:0] s_axis_tdata; + + //- AXI Stream - Output ------------- + logic m_axis_tready; + uwire m_axis_tvalid; + dat_t m_axis_tdata; + + dwc_axi #(.IBITS(IBITS), .OBITS(OBITS)) dut ( + .clk, .rst, + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + + // Stimulus: Feed + dat_t Q[$]; + initial begin + s_axis_tvalid = 0; + s_axis_tdata = 'x; + @(posedge clk iff !rst); + + repeat(57600) begin + automatic dat_t [K-1:0] dat; + std::randomize(dat); + + while($urandom()%7 < 2) @(posedge clk); + + s_axis_tvalid <= 1; + s_axis_tdata <= dat; + @(posedge clk iff s_axis_tready); + for(int unsigned i = 0; i < K; i++) Q.push_back(dat[i]); + + s_axis_tvalid <= 0; + s_axis_tdata <= 'x; + end + + repeat(16) @(posedge clk); + $finish; + end + + // Output Sink + initial begin + m_axis_tready = 0; + @(posedge clk iff !rst); + + forever begin + automatic dat_t dat; + + while($urandom()%9 < 1) @(posedge clk); + + m_axis_tready <= 1; + @(posedge clk iff m_axis_tvalid); + assert(Q.size) else begin + $error("Spurious output."); + $stop; + end + dat = Q.pop_front(); + assert(m_axis_tdata == dat) else begin + $error("Output mismatch: 0x%0x instead of 0x%0x", m_axis_tdata, dat); + $stop; + end + + m_axis_tready <= 0; + end + end + end : blkDown + +endmodule : dwc_axi_tb From a25f5d469668fec173db6c212324a7d49e0247d2 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 22 Nov 2023 11:32:07 +0000 Subject: [PATCH 02/10] [rtllib] Rename clk, rst in dwc module and first draft of verilog wrapper --- finn-rtllib/dwc/hdl/dwc_axi.sv | 12 +++--- finn-rtllib/dwc/hdl/dwc_template.v | 66 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 finn-rtllib/dwc/hdl/dwc_template.v diff --git a/finn-rtllib/dwc/hdl/dwc_axi.sv b/finn-rtllib/dwc/hdl/dwc_axi.sv index ea52b9ed24..5381b57ac4 100644 --- a/finn-rtllib/dwc/hdl/dwc_axi.sv +++ b/finn-rtllib/dwc/hdl/dwc_axi.sv @@ -36,8 +36,8 @@ module dwc_axi #( int unsigned OBITS )( //- Global Control ------------------ - input logic clk, - input logic rst, + input logic ap_clk, + input logic ap_rst_n, //- AXI Stream - Input -------------- output logic s_axis_tready, @@ -72,8 +72,8 @@ module dwc_axi #( logic [$clog2(K):0] ACnt = K-1; // (empty) K-1, ..., 0, -1 (full/valid) dat_t BDat = 'x; logic BRdy = 1; - always_ff @(posedge clk) begin - if(rst) begin + always_ff @(posedge ap_clk) begin + if(ap_rst_n) begin ADat <= 'x; ACnt <= K-1; BDat <= 'x; @@ -118,8 +118,8 @@ module dwc_axi #( logic BRdy = 1; dat_t CDat = 'x; logic CVld = 0; - always_ff @(posedge clk) begin - if(rst) begin + always_ff @(posedge ap_clk) begin + if(ap_rst_n) begin ADat <= 'x; ACnt <= 1; BDat <= 'x; diff --git a/finn-rtllib/dwc/hdl/dwc_template.v b/finn-rtllib/dwc/hdl/dwc_template.v new file mode 100644 index 0000000000..27f6c70dff --- /dev/null +++ b/finn-rtllib/dwc/hdl/dwc_template.v @@ -0,0 +1,66 @@ +/****************************************************************************** + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +module $TOP_MODULE_NAME$( +//- Global Control ------------------ +(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) +input ap_clk, +input ap_rst_n, + +//- AXI Stream - Input -------------- +output in0_V_TREADY, +input in0_V_TVALID, +input [$IBITS$-1:0] in0_V_TDATA, + +//- AXI Stream - Output ------------- +input out_V_TREADY, +output out_V_TVALID, +output [$OBITS$-1:0] out_V_TDATA +); + + +dwc_axi #( +.IBITS($IBITS$), +.OBITS($OBITS$) +) +$TOP_MODULE_NAME$_impl +( + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n), + .s_axis_tready(in0_V_TREADY), + .s_axis_tvalid(in0_V_TVALID), + .s_axis_tdata(in0_V_TDATA), + .m_axis_tready(out_V_TREADY), + .m_axis_tvalid(out_V_TVALID), + .m_axis_tdata(out_V_TDATA) +); + +endmodule From 048317a70cd782ad34148bddac94752bb88ecf1e Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 22 Nov 2023 11:34:02 +0000 Subject: [PATCH 03/10] [CustomOp] Initial draft of custom op for dwc rtl component --- src/finn/custom_op/fpgadataflow/__init__.py | 4 + .../streamingdatawidthconverter_rtl.py | 359 ++++++++++++++++++ 2 files changed, 363 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 56d4230a3a..c120667d81 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -56,6 +56,9 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( StreamingDataWidthConverter_Batch, ) +from finn.custom_op.fpgadataflow.streamingdatawidthconverter_rtl import ( + StreamingDataWidthConverter_rtl, +) from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch @@ -75,6 +78,7 @@ custom_op["ConvolutionInputGenerator_rtl"] = ConvolutionInputGenerator_rtl custom_op["TLastMarker"] = TLastMarker custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch +custom_op["StreamingDataWidthConverter_rtl"] = StreamingDataWidthConverter_rtl custom_op["StreamingFIFO"] = StreamingFIFO custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch custom_op["Pool_Batch"] = Pool_Batch diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py new file mode 100644 index 0000000000..e89bfd2526 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py @@ -0,0 +1,359 @@ +# Copyright (C) 2023, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +import shutil +import warnings +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +class StreamingDataWidthConverter_rtl(HLSCustomOp): + """Class that corresponds to finn-rtllib datawidth converter + module.""" + + def get_nodeattr_types(self): + my_attrs = { + # shape of input/output tensors + "shape": ("ints", True, []), + # bit width of input and output streams + "inWidth": ("i", True, 0), + "outWidth": ("i", True, 0), + # FINN DataTypes for inputs/outputs + "dataType": ("s", True, ""), + # attribute to save top module name - not user configurable + "gen_top_module": ("s", False, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_input_datatype(self, ind=0): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("dataType")] + + def get_output_datatype(self, ind=0): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("dataType")] + + def get_normal_input_shape(self, ind=0): + ishape = self.get_nodeattr("shape") + return ishape + + def get_normal_output_shape(self, ind=0): + oshape = self.get_nodeattr("shape") + return oshape + + def check_divisible_iowidths(self): + iwidth = self.get_nodeattr("inWidth") + owidth = self.get_nodeattr("outWidth") + # the rtl module only supports + # stream widths that are divisible by + # integer width ratios + iwidth_d = iwidth % owidth == 0 + owidth_d = owidth % iwidth == 0 + assert ( + iwidth_d or owidth_d + ), """RTL implementation of DWC requires + stream widths that are integer width ratios + from each other. Input width is set to %s + and output width is set to %s """ % ( + iwidth, + owidth, + ) + + def get_folded_input_shape(self, ind=0): + self.check_divisible_iowidths() + iwidth = self.get_nodeattr("inWidth") + ishape = self.get_normal_input_shape() + dummy_t = np.random.randn(*ishape) + ibits = self.get_input_datatype().bitwidth() + assert ( + iwidth % ibits == 0 + ), """DWC input width must be divisible by + input element bitwidth""" + ielems = int(iwidth // ibits) + ichannels = ishape[-1] + new_shape = [] + for i in ishape[:-1]: + new_shape.append(i) + new_shape.append(int(ichannels // ielems)) + new_shape.append(ielems) + dummy_t = dummy_t.reshape(new_shape) + return dummy_t.shape + + def get_folded_output_shape(self, ind=0): + self.check_divisible_iowidths() + owidth = self.get_nodeattr("outWidth") + oshape = self.get_normal_output_shape() + dummy_t = np.random.randn(*oshape) + obits = self.get_output_datatype().bitwidth() + assert ( + owidth % obits == 0 + ), """DWC output width must be divisible by + input element bitwidth""" + oelems = int(owidth // obits) + ochannels = oshape[-1] + new_shape = [] + for i in oshape[:-1]: + new_shape.append(i) + new_shape.append(int(ochannels // oelems)) + new_shape.append(oelems) + dummy_t = dummy_t.reshape(new_shape) + + return dummy_t.shape + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def get_instream_width(self, ind=0): + in_width = self.get_nodeattr("inWidth") + return in_width + + def get_outstream_width(self, ind=0): + out_width = self.get_nodeattr("outWidth") + return out_width + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingDWC." + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("dataType", idt.name) + # data type stays the same + model.set_tensor_datatype(node.output[0], idt) + + def verify_node(self): + pass + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + + if mode == "cppsim": + raise Exception( + """cppsim not possible for StreamingDataWidthConverter_rtl, + please set exec_mode to rtlsim""" + ) + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == tuple( + exp_ishape + ), """Input shape doesn't + match expected shape.""" + export_idt = self.get_input_datatype() + + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy(rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + + assert context[node.output[0]].shape == tuple( + exp_oshape + ), """Output shape doesn't match expected shape.""" + + def get_template_values(self): + topname = self.get_verilog_top_module_name() + ibits = self.get_instream_width() + obits = self.get_outstream_width() + code_gen_dict = { + "IBITS": int(ibits), + "OBITS": int(obits), + "TOP_MODULE_NAME": topname, + } + return code_gen_dict + + def generate_hdl(self): + rtlsrc = os.environ["FINN_ROOT"] + "/finn-rtllib/dwc/hdl" + template_path = rtlsrc + "/dwc_template.v" + code_gen_dict = self.get_template_values() + # save top module name so we can refer to it after this node has been renamed + # (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject) + self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name()) + + # apply code generation to templates + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(template_path, "r") as f: + template = f.read() + for key_name in code_gen_dict: + key = "$%s$" % key_name + template = template.replace(key, str(code_gen_dict[key_name])) + + with open( + os.path.join(code_gen_dir, self.get_verilog_top_module_name() + ".v"), + "w", + ) as f: + f.write(template) + + sv_files = ["dwc_axi.sv"] + for sv_file in sv_files: + shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) + # set ipgen_path and ip_path so that HLS-Synth transformation + # and stich_ip transformation do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def prepare_rtlsim(self): + """Creates a Verilator emulation library for the RTL code generated + for this node, sets the rtlsim_so attribute to its path and returns + a PyVerilator wrapper around it.""" + # Modified to use generated (System-)Verilog instead of HLS output products + + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_paths = [code_gen_dir] + verilog_files = [ + "dwc_axi.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + + # build the Verilator emu library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_verilog_top_module_name(), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + sourcefiles = [ + "dwc_axi.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % (f)] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def code_generation_ipgen(self, model, fpgapart, clk): + """Normally: Generates C++ code and tcl script for IP generation. + Here: Generates (System-)Verilog code for IP generation.""" + self.generate_hdl() + + def ipgen_singlenode_code(self): + """Normally: Builds the bash script for IP generation.""" + pass + + def code_generation_cppsim(self, model): + """Normally: Generates C++ code for simulation (cppsim).""" + pass + + def compile_singlenode_code(self): + pass + + def global_includes(self): + pass + + def defines(self, var): + pass + + def read_npy_data(self): + pass + + def strm_decl(self): + pass + + def docompute(self): + pass + + def dataoutstrm(self): + pass + + def save_as_npy(self): + pass + + def blackboxfunction(self): + pass + + def pragmas(self): + pass From 0d01a86e0b9b824b811972110d690479d70aead0 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 22 Nov 2023 12:04:58 +0000 Subject: [PATCH 04/10] [Transformation] Extend InsertDWC to derive rtl variant when selected --- .../transformation/fpgadataflow/insert_dwc.py | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index 140d154b1a..fb21cc822d 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -1,3 +1,4 @@ +import warnings from onnx import TensorProto from onnx import helper as oh from qonnx.custom_op.registry import getCustomOp @@ -33,8 +34,9 @@ def _suitable_node(node): class InsertDWC(Transformation): """Add data width converters between layers where necessary.""" - def __init__(self): + def __init__(self, use_rtl_variant=False): super().__init__() + self.use_rtl_variant = use_rtl_variant def apply(self, model): graph = model.graph @@ -80,11 +82,20 @@ def apply(self, model): dwc_in_width = n0.get_outstream_width() # determine dwc outwidth dwc_out_width = n1.get_instream_width() - # use hls mode by default since it supports more configs - # vivado mode can be manually enabled by user, but does not - # support e.g. node-by-node rtlsim neded for - # characterization-based FIFO sizing - impl_style = "hls" + if self.use_rtl_variant: + # check if rtl variant can be used + iwidth_d = dwc_in_width % dwc_out_width == 0 + owidth_d = dwc_out_width % dwc_in_width == 0 + if iwidth_d or owidth_d: + node_optype = "StreamingDataWidthConverter_rtl" + else: + warnings.warn( + "DWC cannot be implemented as RTL variant, default to hls" + ) + node_optype = "StreamingDataWidthConverter_Batch" + self.use_rtl_variant = False + else: + node_optype = "StreamingDataWidthConverter_Batch" # determine shape for dwc dwc_shape = n0.get_normal_output_shape() @@ -100,7 +111,7 @@ def apply(self, model): graph.value_info.append(dwc_output_tensor) dwc_node = oh.make_node( - "StreamingDataWidthConverter_Batch", + node_optype, [output_name], [dwc_output_tensor.name], domain="finn.custom_op.fpgadataflow", @@ -109,8 +120,16 @@ def apply(self, model): inWidth=dwc_in_width, outWidth=dwc_out_width, dataType=str(dtype.name), - impl_style=impl_style, ) + # if not rtl variant is selected + # use hls mode by default since it supports more configs + # vivado mode can be manually enabled by user, but does not + # support e.g. node-by-node rtlsim neded for + # characterization-based FIFO sizing + if not self.use_rtl_variant: + impl_attr = oh.make_attribute("impl_style", "hls") + dwc_node.attribute.append(impl_attr) + # insert dwc graph.node.insert(node_ind + 1, dwc_node) From b1fcf88843752eb47c87204aa69a5640797c41e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 22 Nov 2023 13:57:18 +0000 Subject: [PATCH 05/10] Fix clock association and polarity of reset. --- finn-rtllib/dwc/hdl/dwc_axi.sv | 10 +++--- finn-rtllib/dwc/hdl/dwc_template.v | 55 +++++++++++++++--------------- finn-rtllib/dwc/sim/dwc_axi_tb.sv | 4 +-- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/finn-rtllib/dwc/hdl/dwc_axi.sv b/finn-rtllib/dwc/hdl/dwc_axi.sv index 5381b57ac4..7aa915289f 100644 --- a/finn-rtllib/dwc/hdl/dwc_axi.sv +++ b/finn-rtllib/dwc/hdl/dwc_axi.sv @@ -49,6 +49,8 @@ module dwc_axi #( output logic m_axis_tvalid, output logic [OBITS-1:0] m_axis_tdata ); + uwire clk = ap_clk; + uwire rst = !ap_rst_n; if(IBITS == OBITS) begin : genNoop assign s_axis_tready = m_axis_tready; @@ -72,8 +74,8 @@ module dwc_axi #( logic [$clog2(K):0] ACnt = K-1; // (empty) K-1, ..., 0, -1 (full/valid) dat_t BDat = 'x; logic BRdy = 1; - always_ff @(posedge ap_clk) begin - if(ap_rst_n) begin + always_ff @(posedge clk) begin + if(rst) begin ADat <= 'x; ACnt <= K-1; BDat <= 'x; @@ -118,8 +120,8 @@ module dwc_axi #( logic BRdy = 1; dat_t CDat = 'x; logic CVld = 0; - always_ff @(posedge ap_clk) begin - if(ap_rst_n) begin + always_ff @(posedge clk) begin + if(rst) begin ADat <= 'x; ACnt <= 1; BDat <= 'x; diff --git a/finn-rtllib/dwc/hdl/dwc_template.v b/finn-rtllib/dwc/hdl/dwc_template.v index 27f6c70dff..9541913c9f 100644 --- a/finn-rtllib/dwc/hdl/dwc_template.v +++ b/finn-rtllib/dwc/hdl/dwc_template.v @@ -30,37 +30,36 @@ *****************************************************************************/ module $TOP_MODULE_NAME$( -//- Global Control ------------------ -(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) -input ap_clk, -input ap_rst_n, + //- Global Control ------------------ + (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *) + input ap_clk, + (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) + input ap_rst_n, -//- AXI Stream - Input -------------- -output in0_V_TREADY, -input in0_V_TVALID, -input [$IBITS$-1:0] in0_V_TDATA, + //- AXI Stream - Input -------------- + output in0_V_TREADY, + input in0_V_TVALID, + input [$IBITS$-1:0] in0_V_TDATA, -//- AXI Stream - Output ------------- -input out_V_TREADY, -output out_V_TVALID, -output [$OBITS$-1:0] out_V_TDATA + //- AXI Stream - Output ------------- + input out_V_TREADY, + output out_V_TVALID, + output [$OBITS$-1:0] out_V_TDATA ); - -dwc_axi #( -.IBITS($IBITS$), -.OBITS($OBITS$) -) -$TOP_MODULE_NAME$_impl -( - .ap_clk(ap_clk), - .ap_rst_n(ap_rst_n), - .s_axis_tready(in0_V_TREADY), - .s_axis_tvalid(in0_V_TVALID), - .s_axis_tdata(in0_V_TDATA), - .m_axis_tready(out_V_TREADY), - .m_axis_tvalid(out_V_TVALID), - .m_axis_tdata(out_V_TDATA) -); + dwc_axi #( + .IBITS($IBITS$), + .OBITS($OBITS$) + ) impl ( + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n), + .s_axis_tready(in0_V_TREADY), + .s_axis_tvalid(in0_V_TVALID), + .s_axis_tdata(in0_V_TDATA), + .m_axis_tready(out_V_TREADY), + .m_axis_tvalid(out_V_TVALID), + .m_axis_tdata(out_V_TDATA) + ); endmodule diff --git a/finn-rtllib/dwc/sim/dwc_axi_tb.sv b/finn-rtllib/dwc/sim/dwc_axi_tb.sv index 6bc3249685..b47e5b2f83 100644 --- a/finn-rtllib/dwc/sim/dwc_axi_tb.sv +++ b/finn-rtllib/dwc/sim/dwc_axi_tb.sv @@ -61,7 +61,7 @@ module dwc_axi_tb; dat_t [K-1:0] m_axis_tdata; dwc_axi #(.IBITS(IBITS), .OBITS(OBITS)) dut ( - .clk, .rst, + .ap_clk(clk), .ap_rst_n(!rst), .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, .m_axis_tready, .m_axis_tvalid, .m_axis_tdata ); @@ -134,7 +134,7 @@ module dwc_axi_tb; dat_t m_axis_tdata; dwc_axi #(.IBITS(IBITS), .OBITS(OBITS)) dut ( - .clk, .rst, + .ap_clk(clk), .ap_rst_n(!rst), .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, .m_axis_tready, .m_axis_tvalid, .m_axis_tdata ); From 2ad42b06e297a2e4b504a92f83111ec20ee01a5a Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 22 Nov 2023 15:12:49 +0000 Subject: [PATCH 06/10] [Test] Extend dwc testing to test rtl variant of node --- tests/fpgadataflow/test_fpgadataflow_dwc.py | 31 +++++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index eb6e0651d9..f3302132af 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2020-2022, Xilinx, Inc. +# Copyright (C) 2023, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -41,12 +42,17 @@ from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style): +def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style, use_rtl_variant): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) + if use_rtl_variant: + optype = "StreamingDataWidthConverter_rtl" + else: + optype = "StreamingDataWidthConverter_Batch" + DWC_node = helper.make_node( - "StreamingDataWidthConverter_Batch", + optype, ["inp"], ["outp"], domain="finn.custom_op.fpgadataflow", @@ -55,8 +61,12 @@ def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_styl inWidth=inWidth, outWidth=outWidth, dataType=str(finn_dtype.name), - impl_style=impl_style, + rtlsim_trace="dwc.vcd", ) + if not use_rtl_variant: + # add additional attribute + impl_attr = helper.make_attribute("impl_style", impl_style) + DWC_node.attribute.append(impl_attr) graph = helper.make_graph(nodes=[DWC_node], name="dwc_graph", inputs=[inp], outputs=[outp]) @@ -85,18 +95,27 @@ def prepare_inputs(input_tensor, dt): ([1, 2, 8], 8, 16, DataType["INT2"], "vivado"), ], ) +@pytest.mark.parametrize("use_rtl_variant", [0, 1]) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_dwc_rtlsim(config): +def test_fpgadataflow_dwc_rtlsim(config, use_rtl_variant): shape, inWidth, outWidth, finn_dtype, impl_style = config + + if use_rtl_variant: + iwidth_d = inWidth % outWidth == 0 + owidth_d = outWidth % inWidth == 0 + if not (iwidth_d or owidth_d): + pytest.skip("RTL variant only supports stream widths that are divisible by int ratios") test_fpga_part = "xc7z020clg400-1" target_clk_ns = 10.0 # generate input data x = gen_finn_dt_tensor(finn_dtype, shape) input_dict = prepare_inputs(x, finn_dtype) - model = make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style) + model = make_single_dwc_modelwrapper( + shape, inWidth, outWidth, finn_dtype, impl_style, use_rtl_variant + ) model = model.transform(InsertFIFO(create_shallow_fifos=True)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, 5)) From 64175ca2c8fb6f9b15b0f116570f2e7e4301e842 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 22 Nov 2023 15:18:54 +0000 Subject: [PATCH 07/10] [Transformation] Use RTL DWC by default --- src/finn/transformation/fpgadataflow/insert_dwc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index fb21cc822d..bf0254c1a7 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -34,7 +34,7 @@ def _suitable_node(node): class InsertDWC(Transformation): """Add data width converters between layers where necessary.""" - def __init__(self, use_rtl_variant=False): + def __init__(self, use_rtl_variant=True): super().__init__() self.use_rtl_variant = use_rtl_variant From 9fd482e7d3a340b5e48bc83841b6f5daefdb22b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 24 Nov 2023 16:51:14 +0000 Subject: [PATCH 08/10] Extended AXI-lite data bus to next full byte boundary. --- finn-rtllib/dwc/hdl/dwc.sv | 158 +++++++++++++++++++++++++++++ finn-rtllib/dwc/hdl/dwc_axi.sv | 125 +++-------------------- finn-rtllib/dwc/hdl/dwc_template.v | 16 ++- finn-rtllib/dwc/sim/dwc_axi_tb.sv | 2 +- 4 files changed, 185 insertions(+), 116 deletions(-) create mode 100644 finn-rtllib/dwc/hdl/dwc.sv diff --git a/finn-rtllib/dwc/hdl/dwc.sv b/finn-rtllib/dwc/hdl/dwc.sv new file mode 100644 index 0000000000..13b0cb34c4 --- /dev/null +++ b/finn-rtllib/dwc/hdl/dwc.sv @@ -0,0 +1,158 @@ +/****************************************************************************** + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Stream Data Width Converter. + * @author Thomas B. Preußer + *****************************************************************************/ +module dwc #( + int unsigned IBITS, + int unsigned OBITS +)( + //- Global Control ------------------ + input logic clk, + input logic rst, + + //- AXI Stream - Input -------------- + output logic irdy, + input logic ivld, + input logic [IBITS-1:0] idat, + + //- AXI Stream - Output ------------- + input logic ordy, + output logic ovld, + output logic [OBITS-1:0] odat +); + + if(IBITS == OBITS) begin : genNoop + assign irdy = ordy; + assign ovld = ivld; + assign odat = idat; + end : genNoop + else if(IBITS < OBITS) begin : genUp + + // Sanity Checking: integer upscaling + initial begin + if(OBITS % IBITS) begin + $error("Output width %0d is not a multiple of input width %0d.", OBITS, IBITS); + $finish; + end + end + + // Parallelizing Shift Register A and Sidestep Buffer B on Input Path + localparam int unsigned K = OBITS / IBITS; + typedef logic [IBITS-1:0] dat_t; + dat_t [K-1:0] ADat = 'x; + logic [$clog2(K):0] ACnt = K-1; // (empty) K-1, ..., 0, -1 (full/valid) + dat_t BDat = 'x; + logic BRdy = 1; + always_ff @(posedge clk) begin + if(rst) begin + ADat <= 'x; + ACnt <= K-1; + BDat <= 'x; + BRdy <= 1; + end + else begin + automatic type(ACnt) acnt = (ovld && ordy)? K-1 : ACnt; + automatic logic rdy = !ovld || ordy; + if((ivld || !BRdy) && rdy) begin + ADat <= { BRdy? idat : BDat, ADat[K-1:1] }; + acnt--; + end + ACnt <= acnt; + + if(BRdy) BDat <= idat; + BRdy <= rdy || (BRdy && !ivld); + end + end + + // Output Assignments + assign irdy = BRdy; + assign ovld = ACnt[$left(ACnt)]; + assign odat = ADat; + + end : genUp + else begin : genDown + + // Sanity Checking: integer downscaling + initial begin + if(IBITS % OBITS) begin + $error("Input width %0d is not a multiple of output width %0d.", IBITS, OBITS); + $finish; + end + end + + // Serializing Shift Register A and Sidestep Buffer B on Output Path + localparam int unsigned K = IBITS / OBITS; + typedef logic [OBITS-1:0] dat_t; + dat_t [ K-1:0] ADat = 'x; + logic [$clog2(K):0] ACnt = 1; // (full) -K+1, ..., -1, 0, 1 (empty/not valid) + dat_t BDat = 'x; + logic BRdy = 1; + dat_t CDat = 'x; + logic CVld = 0; + always_ff @(posedge clk) begin + if(rst) begin + ADat <= 'x; + ACnt <= 1; + BDat <= 'x; + BRdy <= 1; + CDat <= 'x; + CVld <= 0; + end + else begin + automatic type(ACnt) acnt = ACnt; + automatic logic ainc = 0; + if(irdy) begin + ADat <= idat; + acnt = ivld? -K+1 : 1; + end + else if(BRdy) begin + ADat <= { {OBITS{1'bx}}, ADat[K-1:1] }; + ainc = BRdy; + end; + ACnt <= acnt + ainc; + + if(BRdy) BDat <= ADat[0]; + BRdy <= !CVld || ordy || (BRdy && !ACnt[$left(ACnt)] && ACnt[0]); + + if(!CVld || ordy) CDat <= BRdy? ADat[0] : BDat; + CVld <= (CVld && !ordy) || !BRdy || ACnt[$left(ACnt)] || !ACnt[0]; + end + end + + // Output Assignments + assign irdy = BRdy && !ACnt[$left(ACnt)]; + assign ovld = CVld; + assign odat = CDat; + + end : genDown + +endmodule : dwc diff --git a/finn-rtllib/dwc/hdl/dwc_axi.sv b/finn-rtllib/dwc/hdl/dwc_axi.sv index 7aa915289f..dfe02fcb48 100644 --- a/finn-rtllib/dwc/hdl/dwc_axi.sv +++ b/finn-rtllib/dwc/hdl/dwc_axi.sv @@ -28,12 +28,15 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @brief AXI Stream Data Width Converter. + * @brief AXI Stream Adapter for Data Width Converter. * @author Thomas B. Preußer *****************************************************************************/ module dwc_axi #( int unsigned IBITS, - int unsigned OBITS + int unsigned OBITS, + + localparam int unsigned AXI_IBITS = (IBITS+7)/8 * 8, + localparam int unsigned AXI_OBITS = (OBITS+7)/8 * 8 )( //- Global Control ------------------ input logic ap_clk, @@ -42,119 +45,21 @@ module dwc_axi #( //- AXI Stream - Input -------------- output logic s_axis_tready, input logic s_axis_tvalid, - input logic [IBITS-1:0] s_axis_tdata, + input logic [AXI_IBITS-1:0] s_axis_tdata, //- AXI Stream - Output ------------- input logic m_axis_tready, output logic m_axis_tvalid, - output logic [OBITS-1:0] m_axis_tdata + output logic [AXI_OBITS-1:0] m_axis_tdata ); - uwire clk = ap_clk; - uwire rst = !ap_rst_n; - - if(IBITS == OBITS) begin : genNoop - assign s_axis_tready = m_axis_tready; - assign m_axis_tvalid = s_axis_tvalid; - assign m_axis_tdata = s_axis_tdata; - end : genNoop - else if(IBITS < OBITS) begin : genUp - - // Sanity Checking: integer upscaling - initial begin - if(OBITS % IBITS) begin - $error("Output width %0d is not a multiple of input width %0d.", OBITS, IBITS); - $finish; - end - end - - // Parallelizing Shift Register A and Sidestep Buffer B on Input Path - localparam int unsigned K = OBITS / IBITS; - typedef logic [IBITS-1:0] dat_t; - dat_t [K-1:0] ADat = 'x; - logic [$clog2(K):0] ACnt = K-1; // (empty) K-1, ..., 0, -1 (full/valid) - dat_t BDat = 'x; - logic BRdy = 1; - always_ff @(posedge clk) begin - if(rst) begin - ADat <= 'x; - ACnt <= K-1; - BDat <= 'x; - BRdy <= 1; - end - else begin - automatic type(ACnt) acnt = (m_axis_tvalid && m_axis_tready)? K-1 : ACnt; - automatic logic rdy = !m_axis_tvalid || m_axis_tready; - if((s_axis_tvalid || !BRdy) && rdy) begin - ADat <= { BRdy? s_axis_tdata : BDat, ADat[K-1:1] }; - acnt--; - end - ACnt <= acnt; - - if(BRdy) BDat <= s_axis_tdata; - BRdy <= rdy || (BRdy && !s_axis_tvalid); - end - end - - // Output Assignments - assign s_axis_tready = BRdy; - assign m_axis_tvalid = ACnt[$left(ACnt)]; - assign m_axis_tdata = ADat; - - end : genUp - else begin : genDown - - // Sanity Checking: integer downscaling - initial begin - if(IBITS % OBITS) begin - $error("Input width %0d is not a multiple of output width %0d.", IBITS, OBITS); - $finish; - end - end - - // Serializing Shift Register A and Sidestep Buffer B on Output Path - localparam int unsigned K = IBITS / OBITS; - typedef logic [OBITS-1:0] dat_t; - dat_t [ K-1:0] ADat = 'x; - logic [$clog2(K):0] ACnt = 1; // (full) -K+1, ..., -1, 0, 1 (empty/not valid) - dat_t BDat = 'x; - logic BRdy = 1; - dat_t CDat = 'x; - logic CVld = 0; - always_ff @(posedge clk) begin - if(rst) begin - ADat <= 'x; - ACnt <= 1; - BDat <= 'x; - BRdy <= 1; - CDat <= 'x; - CVld <= 0; - end - else begin - automatic type(ACnt) acnt = ACnt; - automatic logic ainc = 0; - if(s_axis_tready) begin - ADat <= s_axis_tdata; - acnt = s_axis_tvalid? -K+1 : 1; - end - else if(BRdy) begin - ADat <= { {OBITS{1'bx}}, ADat[K-1:1] }; - ainc = BRdy; - end; - ACnt <= acnt + ainc; - - if(BRdy) BDat <= ADat[0]; - BRdy <= !CVld || m_axis_tready || (BRdy && !ACnt[$left(ACnt)] && ACnt[0]); - - if(!CVld || m_axis_tready) CDat <= BRdy? ADat[0] : BDat; - CVld <= (CVld && !m_axis_tready) || !BRdy || ACnt[$left(ACnt)] || !ACnt[0]; - end - end - - // Output Assignments - assign s_axis_tready = BRdy && !ACnt[$left(ACnt)]; - assign m_axis_tvalid = CVld; - assign m_axis_tdata = CDat; - end : genDown + dwc #(.IBITS(IBITS), .OBITS(OBITS)) core ( + .clk(ap_clk), .rst(!ap_rst_n), + .irdy(s_axis_tready), .ivld(s_axis_tvalid), .idat(s_axis_tdata[IBITS-1:0]), + .ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata[OBITS-1:0]) + ); + if(OBITS < AXI_OBITS) begin + assign m_axis_tdata[AXI_OBITS-1:OBITS] = '0; + end endmodule : dwc_axi diff --git a/finn-rtllib/dwc/hdl/dwc_template.v b/finn-rtllib/dwc/hdl/dwc_template.v index 9541913c9f..01a0254040 100644 --- a/finn-rtllib/dwc/hdl/dwc_template.v +++ b/finn-rtllib/dwc/hdl/dwc_template.v @@ -29,7 +29,13 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ -module $TOP_MODULE_NAME$( +module $TOP_MODULE_NAME$ #( + parameter IBITS = $IBITS$, + parameter OBITS = $OBITS$, + + parameter AXI_IBITS = (IBITS+7)/8 * 8, + parameter AXI_OBITS = (OBITS+7)/8 * 8 +)( //- Global Control ------------------ (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *) @@ -40,17 +46,17 @@ module $TOP_MODULE_NAME$( //- AXI Stream - Input -------------- output in0_V_TREADY, input in0_V_TVALID, - input [$IBITS$-1:0] in0_V_TDATA, + input [AXI_IBITS-1:0] in0_V_TDATA, //- AXI Stream - Output ------------- input out_V_TREADY, output out_V_TVALID, - output [$OBITS$-1:0] out_V_TDATA + output [AXI_OBITS-1:0] out_V_TDATA ); dwc_axi #( - .IBITS($IBITS$), - .OBITS($OBITS$) + .IBITS(IBITS), + .OBITS(OBITS) ) impl ( .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), diff --git a/finn-rtllib/dwc/sim/dwc_axi_tb.sv b/finn-rtllib/dwc/sim/dwc_axi_tb.sv index b47e5b2f83..64435c1900 100644 --- a/finn-rtllib/dwc/sim/dwc_axi_tb.sv +++ b/finn-rtllib/dwc/sim/dwc_axi_tb.sv @@ -33,7 +33,7 @@ *****************************************************************************/ module dwc_axi_tb; - localparam int unsigned DBITS = 4; + localparam int unsigned DBITS = 8; localparam int unsigned K = 3; typedef logic [DBITS-1:0] dat_t; From 529c335fadd2d738dbb568e305a7c5834c42f714 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 24 Nov 2023 16:59:33 +0000 Subject: [PATCH 09/10] [DWC] Add additional sv file to list of files to copy --- .../custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py index e89bfd2526..4f592bafaa 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_rtl.py @@ -259,7 +259,7 @@ def generate_hdl(self): ) as f: f.write(template) - sv_files = ["dwc_axi.sv"] + sv_files = ["dwc_axi.sv", "dwc.sv"] for sv_file in sv_files: shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) # set ipgen_path and ip_path so that HLS-Synth transformation @@ -280,6 +280,7 @@ def prepare_rtlsim(self): verilog_paths = [code_gen_dir] verilog_files = [ "dwc_axi.sv", + "dwc.sv", self.get_nodeattr("gen_top_module") + ".v", ] @@ -301,6 +302,7 @@ def code_generation_ipi(self): sourcefiles = [ "dwc_axi.sv", + "dwc.sv", self.get_nodeattr("gen_top_module") + ".v", ] From 9edcdc6eb23011e4b402fc0fdd9f8e5ed2bdb7d2 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 28 Nov 2023 15:57:20 +0000 Subject: [PATCH 10/10] [Tests] Remove saving of waveform for dwc test --- tests/fpgadataflow/test_fpgadataflow_dwc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index f3302132af..47332f069b 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -61,7 +61,6 @@ def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_styl inWidth=inWidth, outWidth=outWidth, dataType=str(finn_dtype.name), - rtlsim_trace="dwc.vcd", ) if not use_rtl_variant: # add additional attribute