Skip to content

Commit

Permalink
Mark output ports with m_ for ASIC pin placement
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Sep 13, 2023
1 parent 89dd249 commit e4b7930
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 53 deletions.
2 changes: 1 addition & 1 deletion asic/scripts/initialFloorplan.tcl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Floorplan
floorPlan -r 0.4 0.85 10.0 10.0 10.0 10.0
floorPlan -r 0.33 0.85 10.0 10.0 10.0 10.0

timeDesign -preplace -prefix preplace

Expand Down
14 changes: 7 additions & 7 deletions fpga/scripts/vivado.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ set SCRIPTS_DIR ../scripts
source $SCRIPTS_DIR/vivado_config.tcl

#Board specific
# source $SCRIPTS_DIR/pynq_z2.tcl
source $SCRIPTS_DIR/pynq_z2.tcl
# source $SCRIPTS_DIR/zcu102.tcl
source $SCRIPTS_DIR/zcu104.tcl
# source $SCRIPTS_DIR/zcu104.tcl


# CREATE IPs
Expand Down Expand Up @@ -75,11 +75,11 @@ connect_bd_net [get_bd_pins $PS_CLK] [get_bd_pins dnn_engine_0/aclk]
connect_bd_intf_net [get_bd_intf_pins dma_pixels/M_AXIS_MM2S] [get_bd_intf_pins dnn_engine_0/s_axis_pixels]
connect_bd_intf_net [get_bd_intf_pins dma_weights/M_AXIS_MM2S] [get_bd_intf_pins dnn_engine_0/s_axis_weights]
connect_bd_net [get_bd_pins dnn_engine_0/aresetn] [get_bd_pins axi_smc/aresetn]
connect_bd_net [get_bd_pins dnn_engine_0/done_fill] [get_bd_pins xlconcat_0/In2]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_addr_a] [get_bd_pins dnn_engine_0/bram_addr_a]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_rddata_a] [get_bd_pins dnn_engine_0/bram_rddata_a]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_en_a] [get_bd_pins dnn_engine_0/bram_en_a]
connect_bd_net [get_bd_pins axi_gpio_out/gpio_io_o] [get_bd_pins dnn_engine_0/t_done_proc]
connect_bd_net [get_bd_pins dnn_engine_0/m_done_fill] [get_bd_pins xlconcat_0/In2]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_addr_a] [get_bd_pins dnn_engine_0/m_ram_addr_a]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_rddata_a] [get_bd_pins dnn_engine_0/m_ram_rddata_a]
connect_bd_net [get_bd_pins axi_bram_ctrl/bram_en_a] [get_bd_pins dnn_engine_0/m_ram_en_a]
connect_bd_net [get_bd_pins axi_gpio_out/gpio_io_o] [get_bd_pins dnn_engine_0/m_t_done_proc]

validate_bd_design

Expand Down
20 changes: 10 additions & 10 deletions rtl/dnn_engine.v
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ module dnn_engine #(
input wire [S_WEIGHTS_WIDTH_LF -1:0] s_axis_weights_tdata,
input wire [S_WEIGHTS_WIDTH_LF/8-1:0] s_axis_weights_tkeep,

input wire [(OUT_ADDR_WIDTH+2)-1:0] bram_addr_a,
output wire [ OUT_BITS -1:0] bram_rddata_a,
input wire bram_en_a,
output wire done_fill,
input wire t_done_proc
input wire [(OUT_ADDR_WIDTH+2)-1:0] m_ram_addr_a,
output wire [ OUT_BITS -1:0] m_ram_rddata_a,
input wire m_ram_en_a,
output wire m_done_fill,
input wire m_t_done_proc
);

localparam TUSER_WIDTH = `TUSER_WIDTH;
Expand Down Expand Up @@ -131,11 +131,11 @@ module dnn_engine #(
.s_data (out_s_data ),
.s_last (out_s_last ),

.bram_addr_a (bram_addr_a ),
.bram_rddata_a(bram_rddata_a ),
.bram_en_a (bram_en_a ),
.done_fill (done_fill ),
.t_done_proc (t_done_proc )
.m_ram_addr_a (m_ram_addr_a ),
.m_ram_rddata_a (m_ram_rddata_a),
.m_ram_en_a (m_ram_en_a ),
.m_done_fill (m_done_fill ),
.m_t_done_proc (m_t_done_proc )
);
endmodule

Expand Down
42 changes: 19 additions & 23 deletions rtl/out_ram_switch.sv
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ module out_ram_switch #(
input logic [ROWS -1:0][Y_BITS -1:0] s_data,
input logic s_valid, s_last,

input logic [(ADDR_WIDTH+2)-1:0] bram_addr_a,
output logic [ WORD_WIDTH -1:0] bram_rddata_a,
input logic bram_en_a,
input logic [(ADDR_WIDTH+2)-1:0] m_ram_addr_a,
output logic [ WORD_WIDTH -1:0] m_ram_rddata_a,
input logic m_ram_en_a,

output logic done_fill,
input logic t_done_proc
output logic m_done_fill,
input logic m_t_done_proc
);

localparam BITS_COLS = $clog2(COLS), BITS_ROWS = $clog2(ROWS);
Expand Down Expand Up @@ -91,32 +91,32 @@ module out_ram_switch #(
// -----
// READ
// -----
// 1. fw starts, waits for t_done_fill to toggle
// 2. mod toggles t_done_fill, moving to READ_S, waits for t_done_proc
// 3. fw continues, finishes processing, toggles t_done_proc
// 4. mod senses t_done_proc in READ_S, moves, waits for done_write, toggles t_done_fill
// 5. fw loops to beginning, waits for t_done_fill to toggle
// 1. fw starts, waits for t_m_done_fill to toggle
// 2. mod toggles t_m_done_fill, moving to READ_S, waits for m_t_done_proc
// 3. fw continues, finishes processing, toggles m_t_done_proc
// 4. mod senses m_t_done_proc in READ_S, moves, waits for done_write, toggles t_m_done_fill
// 5. fw loops to beginning, waits for t_m_done_fill to toggle

always_comb
unique case (state_read)
R_IDLE_S : if (done_write [i_read]) state_read_next = R_DONE_FILL_S;
R_DONE_FILL_S: state_read_next = R_READ_S;
R_READ_S : if (dp_prev != t_done_proc) state_read_next = R_WAIT_S;
R_READ_S : if (dp_prev != m_t_done_proc) state_read_next = R_WAIT_S;
R_WAIT_S : state_read_next = R_SWITCH_S;
R_SWITCH_S : state_read_next = R_IDLE_S;
endcase

assign ram_r_addr = bram_addr_a[(ADDR_WIDTH+2)-1:2];
assign bram_rddata_a = WORD_WIDTH'(signed'(ram_dout[i_read])); // pad to 32
assign done_fill = state_read == R_DONE_FILL_S; // one clock for interrupt
assign ram_r_addr = m_ram_addr_a[(ADDR_WIDTH+2)-1:2];
assign m_ram_rddata_a = WORD_WIDTH'(signed'(ram_dout[i_read])); // pad to 32
assign m_done_fill = state_read == R_DONE_FILL_S; // one clock for interrupt

// always_ff @(posedge clk)
// if (!rstn) t_done_fill <= 0;
// else if (state_read == R_DONE_FILL_S) t_done_fill <= !t_done_fill;
// if (!rstn) t_m_done_fill <= 0;
// else if (state_read == R_DONE_FILL_S) t_m_done_fill <= !t_m_done_fill;

always_ff @(posedge clk)
if (!rstn) dp_prev <= 0; // t_done_proc starts at 0
else if (state_read_next == R_WAIT_S) dp_prev <= t_done_proc; // sample dp_prev at end of reading
if (!rstn) dp_prev <= 0; // m_t_done_proc starts at 0
else if (state_read_next == R_WAIT_S) dp_prev <= m_t_done_proc; // sample dp_prev at end of reading

// -----
// PING PONG
Expand All @@ -140,11 +140,7 @@ module out_ram_switch #(
assign ram_addr [i] = (i == i_write && state_write == W_WRITE_S) ? ram_w_addr : ram_r_addr;

localparam RAM_ADDR_BITS = $clog2(COLS*ROWS);
ram_output #(
.DEPTH (COLS * ROWS),
.WIDTH (Y_BITS ),
.LATENCY (RAM_LATENCY)
) RAM (
ram_output RAM (
.clka (clk),
.ena (1'b1),
.wea (ram_wen [i] ),
Expand Down
22 changes: 10 additions & 12 deletions test/sv/dnn_engine_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

module dnn_engine_tb;

localparam DIR_PATH = `DIR_PATH;
localparam VALID_PROB = `VALID_PROB,
READY_PROB = `READY_PROB;

Expand Down Expand Up @@ -38,9 +37,9 @@ module dnn_engine_tb;
logic [S_WEIGHTS_WIDTH_LF/K_BITS-1:0][K_BITS-1:0] s_axis_weights_tdata;
logic [S_WEIGHTS_WIDTH_LF/8-1:0] s_axis_weights_tkeep;

bit bram_en_a, done_fill, t_done_proc;
logic [(OUT_ADDR_WIDTH+2)-1:0] bram_addr_a;
logic [ OUT_BITS -1:0] bram_rddata_a;
bit m_ram_en_a, m_done_fill, m_t_done_proc;
logic [(OUT_ADDR_WIDTH+2)-1:0] m_ram_addr_a;
logic [ OUT_BITS -1:0] m_ram_rddata_a;


dnn_engine pipe (.*);
Expand All @@ -51,12 +50,11 @@ module dnn_engine_tb;
DMA_M2S #(S_WEIGHTS_WIDTH_LF, VALID_PROB, 0) source_k (aclk, aresetn, s_axis_weights_tready, s_axis_weights_tvalid, s_axis_weights_tlast, s_axis_weights_tdata, s_axis_weights_tkeep);

bit y_done=0, x_done=0, w_done=0;
string w_path, x_path;
int w_offset=0, w_bpt=0, x_offset=0, x_bpt=0;

import "DPI-C" function void load_x(inout bit x_done, inout int x_offset, x_bpt);
import "DPI-C" function void load_w(inout bit w_done, inout int w_offset, w_bpt);
import "DPI-C" function void load_y(inout bit y_done, inout bit t_done_proc, inout bit [31:0] y_sram [ROWS*COLS-1:0]);
import "DPI-C" function void load_y(inout bit y_done, inout bit m_t_done_proc, inout bit [31:0] y_sram [ROWS*COLS-1:0]);
import "DPI-C" function void fill_memory();
import "DPI-C" function byte get_byte_wx (int addr, int mode);

Expand All @@ -82,20 +80,20 @@ module dnn_engine_tb;
// Y_SRAM
int file, y_wpt, dout;
initial begin
{bram_addr_a, bram_en_a, t_done_proc} = 0;
{m_ram_addr_a, m_ram_en_a, m_t_done_proc} = 0;
wait(aresetn);
repeat(2) @(posedge aclk);

while (!y_done) begin
wait (done_fill); // callback trigger
wait (m_done_fill); // callback trigger

for (int unsigned ir=0; ir < ROWS*COLS; ir++) begin // DPI-C cannot consume time in verilator, so read in advance
bram_addr_a <= ir*(OUT_BITS/8); // 4 byte words
bram_en_a <= 1;
m_ram_addr_a <= ir*(OUT_BITS/8); // 4 byte words
m_ram_en_a <= 1;
repeat(2) @(posedge aclk) #1ps;
y_sram[ir] = bram_rddata_a;
y_sram[ir] = m_ram_rddata_a;
end
load_y(y_done, t_done_proc, y_sram);
load_y(y_done, m_t_done_proc, y_sram);
end
end

Expand Down

0 comments on commit e4b7930

Please sign in to comment.