Skip to content

Commit 7f01c3c

Browse files
authored
Merge/stream synchronization (#349)
* Updated project_generation_scripts and tf_merge_streamer.vhd to synchronize bx with output, synchronize bx 0 with other bxs, and synchronize reading of memories by tf_merge_streamer * Remove commented out code and fix tabs * Added optimizations to help meet timing: changed tf_merge_streamer and transitioned TPAR memories to URAM * Incorporate suggestions from Jason (change if to reverse loop) * Merged project_generation_scripts PR
1 parent c139d6d commit 7f01c3c

File tree

3 files changed

+191
-140
lines changed

3 files changed

+191
-140
lines changed

IntegrationTests/common/hdl/tf_mem.vhd

+3-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ entity tf_mem is
3636
INIT_HEX : boolean := true; --! Read init file in hex (default) or bin
3737
RAM_PERFORMANCE : string := "HIGH_PERFORMANCE";--! Select "HIGH_PERFORMANCE" (2 clk latency) or "LOW_LATENCY" (1 clk latency)
3838
NAME : string := "MEMNAME"; --! Name of mem for printout
39-
DEBUG : boolean := false --! If true prints debug info
39+
DEBUG : boolean := false; --! If true prints debug info
40+
MEM_TYPE : string := "block" --! specifies RAM type (block/ultra)
4041
);
4142
port (
4243
clka : in std_logic; --! Write clock
@@ -101,7 +102,7 @@ signal sv_RAM_row : std_logic_vector(RAM_WIDTH-1 downto 0) := (others =>'0');
101102

102103
-- ########################### Attributes ###########################
103104
attribute ram_style : string;
104-
attribute ram_style of sa_RAM_data : signal is "block";
105+
attribute ram_style of sa_RAM_data : signal is MEM_TYPE;
105106

106107
begin
107108

+187-137
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,187 @@
1-
--===========================================================================
2-
--! @file
3-
--! @brief Module which reads and streams out the contents of the memories
4-
--! at the end of the first half of the TF algo.
5-
--! @author Jason Fan ([email protected])
6-
--! @date 2024-02-29
7-
--! @version v.1.0
8-
--===========================================================================
9-
10-
--! Standard library
11-
library ieee;
12-
--! Standard package
13-
use ieee.std_logic_1164.all;
14-
--! Signed/unsigned calculations
15-
use ieee.numeric_std.all;
16-
--! Standard functions
17-
library std;
18-
--! Standard TextIO functions
19-
use std.textio.all;
20-
21-
--! Xilinx library
22-
library unisim;
23-
--! Xilinx package
24-
use unisim.vcomponents.all;
25-
use work.tf_pkg.all;
26-
27-
entity tf_merge_streamer is
28-
generic (
29-
RAM_WIDTH : natural := 72;
30-
NUM_PAGES : natural := 8;
31-
RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH;
32-
NUM_INPUTS : natural := 4;
33-
NUM_EXTRA_BITS: natural := 2;
34-
ADDR_WIDTH : natural := 7
35-
);
36-
port (
37-
bx_in : in std_logic_vector(2 downto 0 );
38-
rst: in std_logic;
39-
clk : in std_logic;
40-
--output read enable to tf_mem modules
41-
enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0);
42-
bx_out : out std_logic_vector(2 downto 0);
43-
--output merged stream, includes input word, up to 2 bits that encode the
44-
--original module, and a valid bit (from LSB to MSB)
45-
merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0);
46-
--input data,nent and addresses are best suited for unconstrained arrays
47-
--but this is not supported in vivado 2019
48-
--module always accepts 4 input memories, but will not use all of them
49-
din0: in std_logic_vector(RAM_WIDTH-1 downto 0);
50-
din1: in std_logic_vector(RAM_WIDTH-1 downto 0);
51-
din2: in std_logic_vector(RAM_WIDTH-1 downto 0);
52-
din3: in std_logic_vector(RAM_WIDTH-1 downto 0);
53-
nent0: in t_arr_7b(0 to NUM_PAGES-1);
54-
nent1: in t_arr_7b(0 to NUM_PAGES-1);
55-
nent2: in t_arr_7b(0 to NUM_PAGES-1);
56-
nent3: in t_arr_7b(0 to NUM_PAGES-1);
57-
addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0)
58-
) ;
59-
end entity tf_merge_streamer;
60-
61-
architecture RTL of tf_merge_streamer is
62-
63-
constant MAX_INPUTS : integer := 4;
64-
constant pipe_stages : integer := 4;
65-
66-
type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer;
67-
type toread_arr is array(pipe_stages-1 downto 0) of integer;
68-
69-
--nent and din are repackaged from odd input type into
70-
--arrays
71-
type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1);
72-
type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0);
73-
74-
signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0');
75-
signal readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0');
76-
77-
begin
78-
process(clk)
79-
variable nent_arr: nent_array;
80-
variable din_arr: din_array;
81-
variable bx_last :integer :=0;
82-
variable mem_count : mem_count_arr := (others => 0);
83-
variable current_page: natural := 0;
84-
variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock
85-
86-
variable toread : toread_arr := (others => 0);
87-
88-
begin
89-
if rising_edge(clk) then
90-
nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays
91-
din_arr := (din3, din2, din1, din0);
92-
bx_change := (bx_last /= to_integer(unsigned(bx_in)));
93-
if (bx_change) then --reset with rst signal or a change in bx
94-
-- check if bx changes and update page to read from
95-
mem_count := (others => 0);
96-
end if ;
97-
current_page := to_integer(unsigned(bx_in)) mod NUM_PAGES;
98-
--check if memory read counter is less than nentries
99-
for i in 0 to NUM_INPUTS-1 loop
100-
if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then
101-
readmask(i) <= '1';
102-
else
103-
readmask(i) <= '0';
104-
end if;
105-
end loop;
106-
if (to_integer(unsigned(readmask)) = 0) then
107-
valid(0) <= '0';
108-
else
109-
for j in 0 to NUM_INPUTS-1 loop
110-
if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then
111-
toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS;
112-
exit;
113-
end if;
114-
end loop;
115-
addr_arr(((toread(0)+1)*clogb2(RAM_DEPTH))-1 downto (toread(0))*clogb2(RAM_DEPTH)) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), clogb2(RAM_DEPTH)));
116-
valid(0) <= '1';
117-
mem_count(toread(0)) := mem_count(toread(0)) + 1;
118-
end if;
119-
120-
if valid(pipe_stages-1) ='1' then
121-
if (NUM_EXTRA_BITS > 0) then
122-
merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1));
123-
else
124-
merged_dout <= '1' & din_arr(toread(pipe_stages-2));
125-
end if ;
126-
else
127-
merged_dout <= (others => '0');
128-
end if;
129-
bx_last := to_integer(unsigned(bx_in));
130-
bx_out <= bx_in;
131-
for j in 0 to pipe_stages-2 loop
132-
toread(j+1) := toread(j);
133-
valid(j+1) <= valid(j);
134-
end loop;
135-
end if;
136-
end process;
137-
end RTL;
1+
--===========================================================================
2+
--! @file
3+
--! @brief Module which reads and streams out the contents of the memories
4+
--! at the end of the first half of the TF algo.
5+
--! @author Jason Fan ([email protected])
6+
--! @date 2024-02-29
7+
--! @version v.1.1
8+
--===========================================================================
9+
10+
--! Standard library
11+
library ieee;
12+
--! Standard package
13+
use ieee.std_logic_1164.all;
14+
--! Signed/unsigned calculations
15+
use ieee.numeric_std.all;
16+
--! Standard functions
17+
library std;
18+
--! Standard TextIO functions
19+
use std.textio.all;
20+
21+
--! Xilinx library
22+
library unisim;
23+
--! Xilinx package
24+
use unisim.vcomponents.all;
25+
use work.tf_pkg.all;
26+
27+
entity tf_merge_streamer is
28+
generic (
29+
RAM_WIDTH: natural := 72;
30+
NUM_PAGES : natural := 8;
31+
RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH;
32+
NUM_INPUTS : natural := 4;
33+
NUM_EXTRA_BITS: natural := 2;
34+
ADDR_WIDTH : natural := 7
35+
);
36+
port (
37+
bx_in : in std_logic_vector(2 downto 0 );
38+
bx_in_vld : in std_logic;
39+
rst: in std_logic;
40+
clk : in std_logic;
41+
--output read enable to tf_mem modules
42+
enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0);
43+
bx_out : out std_logic_vector(2 downto 0);
44+
--output merged stream, includes input word, up to 2 bits that encode the
45+
--original module, and a valid bit (from LSB to MSB)
46+
merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0);
47+
--input data,nent and addresses are best suited for unconstrained arrays
48+
--but this is not supported in vivado 2019
49+
--module always accepts 4 input memories, but will not use all of them
50+
din0: in std_logic_vector(RAM_WIDTH-1 downto 0);
51+
din1: in std_logic_vector(RAM_WIDTH-1 downto 0);
52+
din2: in std_logic_vector(RAM_WIDTH-1 downto 0);
53+
din3: in std_logic_vector(RAM_WIDTH-1 downto 0);
54+
nent0: in t_arr_7b(0 to NUM_PAGES-1);
55+
nent1: in t_arr_7b(0 to NUM_PAGES-1);
56+
nent2: in t_arr_7b(0 to NUM_PAGES-1);
57+
nent3: in t_arr_7b(0 to NUM_PAGES-1);
58+
addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0)
59+
) ;
60+
end entity tf_merge_streamer;
61+
62+
architecture RTL of tf_merge_streamer is
63+
64+
constant MAX_INPUTS : integer := 4;
65+
constant pipe_stages : integer := 3;
66+
constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH);
67+
68+
type mem_count_arr is array(MAX_INPUTS-1 downto 0) of integer;
69+
type toread_arr is array(pipe_stages-1 downto 0) of integer range 0 to 3;
70+
type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0);
71+
type addr_arr_arr is array(MAX_INPUTS-1 downto 0) of std_logic_vector(LOG2_RAM_DEPTH-1 downto 0);
72+
73+
--nent and din are repackaged from odd input type into
74+
--arrays
75+
type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1);
76+
type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0);
77+
78+
signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0');
79+
signal bx_pipe : bx_arr := (others => (others => '0'));
80+
signal addr_arr_int : addr_arr_arr := (others => (others => '0'));
81+
signal bx_last : std_logic_vector(2 downto 0) := "111";
82+
signal bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0)
83+
signal mem_count : mem_count_arr := (others => 0);
84+
signal toread : toread_arr := (others => 0);
85+
signal current_page: natural := 7 mod NUM_PAGES;
86+
signal readmask : std_logic_vector(MAX_INPUTS-1 downto 0) := (others => '0');
87+
88+
begin
89+
process(clk)
90+
variable nent_arr: nent_array;
91+
variable din_arr: din_array;
92+
variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock
93+
variable nextread : integer range 0 to 3 := 0;
94+
variable mem_count_next : mem_count_arr := (others => 0);
95+
96+
begin
97+
if rising_edge(clk) then
98+
if (bx_in_vld = '1') then
99+
bx_in_latch <= bx_in;
100+
current_page <= to_integer(unsigned(bx_in)) mod NUM_PAGES;
101+
end if;
102+
103+
nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays
104+
din_arr := (din3, din2, din1, din0);
105+
bx_change := (bx_last /= bx_in_latch);
106+
107+
if (bx_change) then --reset with rst signal or a change in bx
108+
mem_count <= (others => 0);
109+
toread(0) <= (NUM_INPUTS-1) mod NUM_INPUTS;
110+
valid(0) <= '0';
111+
112+
--check if memory read counter is less than nentries
113+
--this sets readmask to 1 for any inputs that still have words to read
114+
for i in 0 to NUM_INPUTS-1 loop
115+
if (0 < to_integer(unsigned(nent_arr(i)(current_page)))) then
116+
readmask(i) <= '1';
117+
else
118+
readmask(i) <= '0';
119+
end if;
120+
end loop;
121+
122+
else
123+
--only check for valid reads on non BX change clocks
124+
--this gives up a clock cycle, but reduces logic levels downstream
125+
126+
for i in 0 to NUM_INPUTS-1 loop
127+
mem_count_next(i) := mem_count(i);
128+
end loop;
129+
130+
if (to_integer(unsigned(readmask)) = 0) then
131+
valid(0) <= '0';
132+
else
133+
valid(0) <= '1';
134+
--loop through starting with the next input in front of the current to-read (round-robin)
135+
for i in 0 to 3 loop
136+
if (readmask((toread(0) - i) mod 4) = '1') then
137+
nextread := (toread(0) - i) mod 4;
138+
end if;
139+
end loop;
140+
addr_arr_int(nextread) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(nextread), LOG2_RAM_DEPTH));
141+
mem_count(nextread) <= mem_count(nextread) + 1;
142+
toread(0) <= nextread;
143+
mem_count_next(nextread) := mem_count_next(nextread)+1;
144+
end if;
145+
146+
--check if memory read counter is less than nentries
147+
--this sets readmask to 1 for any inputs that still have words to read
148+
for i in 0 to NUM_INPUTS-1 loop
149+
if ((mem_count_next(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then
150+
readmask(i) <= '1';
151+
else
152+
readmask(i) <= '0';
153+
end if;
154+
end loop;
155+
156+
end if ;
157+
158+
--generate output a few clocks after address is set to account for delay in RAMs
159+
if valid(pipe_stages-1) ='1' then
160+
if (NUM_EXTRA_BITS > 0) then
161+
merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1));
162+
else
163+
merged_dout <= '1' & din_arr(toread(pipe_stages-1));
164+
end if ;
165+
else
166+
merged_dout <= (others => '0');
167+
end if;
168+
169+
bx_last <= bx_in_latch;
170+
bx_pipe(0) <= bx_in_latch;
171+
bx_out <= bx_pipe(pipe_stages);
172+
for j in pipe_stages-2 downto 0 loop
173+
valid(j+1) <= valid(j);
174+
toread(j+1) <= toread(j);
175+
end loop;
176+
for j in pipe_stages-1 downto 0 loop
177+
bx_pipe(j+1) <= bx_pipe(j);
178+
end loop;
179+
end if;
180+
end process;
181+
182+
GEN_ADDR: for i in 0 to NUM_INPUTS-1 generate
183+
begin
184+
addr_arr(LOG2_RAM_DEPTH*(i+1)-1 downto LOG2_RAM_DEPTH*i) <= addr_arr_int(i);
185+
end generate;
186+
187+
end RTL;

0 commit comments

Comments
 (0)