Skip to content

Commit 06e92ee

Browse files
committed
Merge branch 'spinler-feat-cocotb_test_throughput' into 'devel'
feat: add simulation-based throughput test for DMA See merge request ndk/ndk-fpga!298
2 parents 9e88227 + e03453e commit 06e92ee

File tree

23 files changed

+547
-106
lines changed

23 files changed

+547
-106
lines changed

apps/minimal/tests/cocotb/cocotb_grpc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import scapy.volatile
1414
import scapy.contrib.mpls
1515

16-
from ndk_core import NFBDevice
16+
from cocotbext.ndk_core import NFBDevice
1717

1818
import cocotbext.ofm.utils.sim.modelsim as ms
1919
import cocotb.utils

apps/minimal/tests/cocotb/cocotb_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import cocotb
99
from cocotb.triggers import Timer
1010

11-
from ndk_core import NFBDevice
11+
from cocotbext.ndk_core import NFBDevice
1212

1313
import cocotbext.ofm.utils.sim.modelsim as ms
1414
import cocotb.utils
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
DIR = ARG1
2+
IFN = ARG2
3+
CARD = ARG3
4+
SPEED = ARG4
5+
6+
if (DIR eq "") DIR = "RX"
7+
if (IFN eq "") IFN = "transcript"
8+
if (CARD eq "") CARD = "unknown card"
9+
if (SPEED eq "") CARD = "100G"
10+
11+
12+
MIN=64
13+
MAX=256+64
14+
15+
set xrange [MIN:MAX]
16+
set xtics 32
17+
set mxtics 4
18+
set ytics 10
19+
20+
set style line 42 lc black lw 0.5 dt ". "
21+
22+
set grid xtics ytics mxtics, ls 42
23+
24+
scl=1
25+
set terminal pngcairo size 1920*scl,1080*scl noenhanced font 'Verdana,14' linewidth 2
26+
27+
set xlabel "Packet size [B] (without CRC, without NDP header)"
28+
set ylabel "DMA data throughput [Gbps]"
29+
set key right bottom
30+
31+
32+
set title DIR . " simulation throughput"
33+
set output "report-sim-" . DIR ."-throughput.png"
34+
35+
cmd_getfn(ifn) = "< cat " . ifn . " | grep " . DIR . "THR | sed 's/# " . DIR . "THR: //' |sed 's/ /, /g'"
36+
37+
38+
plot \
39+
cmd_getfn(IFN) using ($1):($3)*0.001 * ((($1)+20) / ($1)) with lines title SPEED . " Eth RAW" , \
40+
cmd_getfn(IFN) using ($1):($3)*0.001 * ((($1)+24) / ($1)) with lines title SPEED . " Eth with CRC" , \
41+
cmd_getfn(IFN) using ($1):($3)*0.001 * ((($1)+20+8) / ($1)) with lines title SPEED . " Eth with 8B NDP header" , \
42+
cmd_getfn(IFN) using ($1):($2)*0.001 * (($1+20) / $1) with lines title CARD . " PCIe" , \
43+
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright (C) 2025 CESNET z. s. p. o.
3+
# Author(s): Martin Spinler <[email protected]>
4+
5+
# INFO
6+
######
7+
# This testbench measures maximal throughput of RX+TX DMA
8+
# and produces graphs in app/build/card directory
9+
10+
import os
11+
import sys
12+
import logging
13+
import cocotb
14+
import itertools
15+
from cocotb.triggers import Timer, Event, First, RisingEdge
16+
17+
from cocotbext.ndk_core import NFBDevice
18+
19+
import cocotbext.ofm.utils.sim.modelsim as ms
20+
import cocotb.utils
21+
22+
from cocotbext.ofm.utils.sim.bus import MfbBus, MiBus, DmaUpMvbBus, DmaDownMvbBus
23+
24+
25+
from ofm.comp.mfb_tools.debug.gen_loop_switch import GenLoopSwitch
26+
27+
# Configuration parameters
28+
##########################
29+
30+
# used channels per DMA_ENDPOINT
31+
USED_CHANNELS = {
32+
"RX": 16,
33+
"TX": 16,
34+
}
35+
# Max expected throughput (for graph generation)
36+
PORT_THROUGHPUT = 100
37+
PKTLEN = [x for x in range(64, 300) if x % 8 in [0, 1]]
38+
39+
40+
logging.basicConfig(stream=sys.stderr, force=True)
41+
logging.getLogger().setLevel(logging.INFO)
42+
43+
logger = logging.getLogger(__name__)
44+
45+
logger_mi = logging.getLogger("cocotb.nfb.ext.python_servicer")
46+
#logger_mi.setLevel(logging.DEBUG)
47+
48+
# Shortcuts
49+
e = cocotb.external
50+
st = cocotb.utils.get_sim_time
51+
add_cursor = ms.add_cursor
52+
print = ms.print
53+
54+
55+
def run_gnuplot(d, dev):
56+
file_path = os.path.realpath(__file__)[:-3] # crop the .py
57+
os.system(f'gnuplot -c "{file_path}.gpi" "{d}" "{os.getcwd()}/transcript" "{dev._card_name}" "{PORT_THROUGHPUT}G"')
58+
59+
60+
async def get_dev(dut, init=True, **kwargs):
61+
dev = NFBDevice(dut, **kwargs)
62+
if init:
63+
await dev.init()
64+
return dev, dev.nfb
65+
66+
67+
async def sendmsgs(txq, msgs):
68+
await e(txq.sendmsg)(msgs)
69+
70+
71+
async def rx_push_descs(rxq, n, ev):
72+
burst = 64
73+
for i in range(n):
74+
pd = cocotb.start_soon(rxq._push_desc(flush=((i % burst) == (burst - 1))))
75+
evw = ev.wait()
76+
tr = await First(pd, evw)
77+
if tr == evw:
78+
return
79+
80+
81+
async def recvmsgs(rxq, event):
82+
while not event.is_set():
83+
m = await e(rxq.recvmsg)()
84+
if m:
85+
pass
86+
else:
87+
await Timer(5, units='ns')
88+
89+
90+
async def rx_stop_ch(rxq):
91+
await e(rxq.stop)()
92+
93+
94+
async def wait_for_val(signal, val, clk, n, to=10000):
95+
re = RisingEdge(clk)
96+
i = 0
97+
while i != n:
98+
i = i + 1 if signal.value == val else 0
99+
await re
100+
to -= 1
101+
if to <= 0:
102+
raise TimeoutError()
103+
104+
105+
def get_msg(pktlen, n, ch):
106+
return (bytes(itertools.chain([0, ch, (n >> 8) & 0xFF, (n) & 0xFF] * ((pktlen + 3) // 4)))[:pktlen], bytes(), 0)
107+
108+
109+
def get_channel_list(tdir):
110+
EP_CHANNELS = getattr(core.dma_i, f"{tdir}_CHANNELS").value
111+
112+
_chnls = min(USED_CHANNELS[tdir], EP_CHANNELS)
113+
chnls = []
114+
for ch in range(DMA_STREAMS * EP_CHANNELS):
115+
if ch % EP_CHANNELS < _chnls:
116+
chnls.append(ch)
117+
return _chnls, chnls
118+
119+
120+
@cocotb.test(timeout_time=80000, timeout_unit='us', skip=False)
121+
async def test_ndp_sendmsg_burst(dut):
122+
tdir = "TX"
123+
clk = core.dma_i.USR_CLK
124+
re = RisingEdge(clk)
125+
bm = busm[tdir]
126+
for m in bm:
127+
cocotb.start_soon(m.monitor(clk))
128+
129+
dev, nfb = await get_dev(dut)
130+
131+
gls = []
132+
for i in range(DMA_STREAMS):
133+
gls.append(GenLoopSwitch(nfb, index=i))
134+
135+
for g in gls:
136+
await e(setattr)(g.r2l, 'input', 2)
137+
138+
stream_chnls, chnls = get_channel_list(tdir)
139+
tasks = []
140+
for pktlen in PKTLEN:
141+
for ch in chnls:
142+
await e(nfb.ndp.tx[ch].start)()
143+
144+
for ch in chnls:
145+
npkts = min(256, 262144 // pktlen)
146+
msgs = [get_msg(pktlen, n, ch) for n in range(npkts)]
147+
t = cocotb.start_soon(sendmsgs(nfb.ndp.tx[ch], msgs))
148+
tasks.append((ch, t))
149+
150+
for m in bm:
151+
await wait_for_val(m._get_handle('SRC_RDY'), 1, core.dma_i.USR_CLK, 1, 10000)
152+
for i in range(1000):
153+
await re
154+
155+
#add_cursor(f"Measure start {pktlen}")
156+
for m in bm:
157+
m.clear()
158+
for i in range(2000):
159+
await re
160+
#add_cursor(f"Measure stop {pktlen}")
161+
162+
mpps = PORT_THROUGHPUT * 1e3 / (8 * (pktlen + 24))
163+
eth_raw = (pktlen * mpps * 8)
164+
print("TXTHR:", pktlen, sum(m._thr * 1000 for m in bm), eth_raw)
165+
166+
for ch, t in tasks:
167+
await t
168+
for ch, t in tasks:
169+
await e(nfb.ndp.tx[ch].stop)()
170+
171+
for m in bm:
172+
await wait_for_val(m._get_handle('SRC_RDY'), 0, core.dma_i.USR_CLK, 150, 10000)
173+
tasks.clear()
174+
175+
run_gnuplot(tdir, dev)
176+
177+
178+
@cocotb.test(timeout_time=80000, timeout_unit='us', skip=False)
179+
async def test_ndp_recvmsg_burst(dut):
180+
tdir = "RX"
181+
clk = core.dma_i.USR_CLK
182+
re = RisingEdge(clk)
183+
bm = busm[tdir]
184+
for m in bm:
185+
cocotb.start_soon(m.monitor(clk))
186+
187+
dev, nfb = await get_dev(dut)
188+
189+
gls = []
190+
for i in range(DMA_STREAMS):
191+
gls.append(GenLoopSwitch(nfb, index=i))
192+
193+
stop_channels = False
194+
channels_running = False
195+
196+
stream_chnls, chnls = get_channel_list(tdir)
197+
tasks = []
198+
for pktlen in PKTLEN:
199+
if not channels_running:
200+
channels_running = True
201+
for ch in chnls:
202+
await e(nfb.ndp.rx[ch].start)()
203+
evd = Event()
204+
td = cocotb.start_soon(rx_push_descs(dev.dma.rx[ch], 2**24, evd))
205+
await re
206+
evr = Event()
207+
tr = cocotb.start_soon(recvmsgs(nfb.ndp.rx[ch], evr))
208+
tasks.append((ch, td, tr, evd, evr))
209+
210+
for i in range(300):
211+
await re
212+
for g in gls:
213+
await e(g.l2r.gen_start)(True, pktlen, 0, stream_chnls)
214+
215+
for m in bm:
216+
await wait_for_val(m._get_handle('SRC_RDY'), 1, core.dma_i.USR_CLK, 10, 10000)
217+
218+
for i in range(1000):
219+
await re
220+
#add_cursor(f"Measure start {pktlen}")
221+
for m in bm:
222+
m.clear()
223+
for i in range(3000):
224+
await re
225+
#add_cursor(f"Measure stop {pktlen}")
226+
227+
mpps = PORT_THROUGHPUT * 1e3 / (8 * (pktlen + 20))
228+
eth_raw = (pktlen * mpps * 8)
229+
print("RXTHR:", pktlen, sum(m._thr * 1000 for m in bm), eth_raw)
230+
231+
for g in gls:
232+
await e(g.l2r.gen_stop)()
233+
234+
for i in range(500):
235+
await re
236+
for m in bm:
237+
await wait_for_val(m._get_handle('SRC_RDY'), 0, core.dma_i.USR_CLK, 10000, 200000)
238+
239+
for i in range(100):
240+
await re
241+
242+
if stop_channels:
243+
channels_running = False
244+
245+
tasks_stop = []
246+
for ch, td, tr, evd, evr in tasks:
247+
evd.set()
248+
await td
249+
evr.set()
250+
await tr
251+
t = cocotb.start_soon(rx_stop_ch(nfb.ndp.rx[ch]))
252+
tasks_stop.append(t)
253+
for t in tasks_stop:
254+
await t
255+
256+
run_gnuplot(tdir, dev)
257+
258+
259+
core = NFBDevice.core_instance_from_top(cocotb.top)
260+
#ms.cmd(f"log -recursive {ms.cocotb2path(core)}/*")
261+
262+
DMA_STREAMS = core.dma_i.DMA_STREAMS.value
263+
264+
pcic = core.pcie_i.pcie_core_i
265+
adapter = pcic.pcie_adapter_g[0].pcie_adapter_i
266+
267+
busm = {"RX": [], "TX": []}
268+
for d in busm:
269+
for i in range(DMA_STREAMS):
270+
t = MfbBus(core.dma_i.gls_g[i].gls_en_g.gen_loop_switch_i, f'DMA_{d}_MFB', label=f"DMA_{d}_MFB{i}")
271+
t.add_wave()
272+
busm[d].append(t)
273+
274+
for i in range(core.dma_i.DMA_ENDPOINTS.value):
275+
DmaUpMvbBus(core.dma_i, 'PCIE_RQ_MVB', i, label=f"RQ_MVB_{i}").add_wave()
276+
MfbBus(core.dma_i, 'PCIE_RQ_MFB', i, label=f"RQ_MFB_{i}").add_wave()
277+
DmaDownMvbBus(core.dma_i, 'PCIE_RC_MVB', i, label=f"RC_MVB_{i}").add_wave()
278+
MfbBus(core.dma_i, 'PCIE_RC_MFB', i, label=f"RC_MFB_{i}").add_wave()
279+
280+
#for m in ["RC", "RQ", "CC", "CQ"]:
281+
# MfbBus(pcic, '{m}_MFB', 0).add_wave()
282+
283+
for i in range(core.pcie_i.PCIE_ENDPOINTS.value):
284+
MiBus(core.pcie_i, 'MI', i, label=f'MI_PCIe{i}').add_wave()

apps/minimal/tests/cocotb/issues/issue1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from cocotbext.ofm.utils.sim.bus import MfbBus, DmaDownMvbBus
1212
from cocotbext.ofm.utils.scapy import simple_tcp_bytes
1313

14-
from ndk_core import NFBDevice
14+
from cocotbext.ndk_core import NFBDevice
1515

1616
print = ms.print
1717

apps/minimal/tests/cocotb/issues/issue2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from cocotbext.ofm.utils.sim.bus import MiBus
1111

12-
from ndk_core import NFBDevice
12+
from cocotbext.ndk_core import NFBDevice
1313

1414
print = ms.print
1515

comp/debug/mem_tester/amm_gen/amm_gen.vhd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ architecture FULL of AMM_GEN is
207207
signal amm_write_delayed : std_logic_vector(AMM_WRITE_DELAY downto 0);
208208
signal amm_read_delayed : std_logic_vector(AMM_READ_DELAY downto 0);
209209

210-
signal target_burst_cnt : std_logic_vector(BURST_BITS - 1 downto 0);
210+
signal target_burst_cnt : std_logic_vector(BURST_BITS - 1 downto 0) := std_logic_vector(to_unsigned(INIT_BURST_CNT, BURST_BITS));
211211
signal target_burst_cnt_lim : std_logic_vector(BURST_BITS - 1 downto 0); -- Indexed from 0 to match curr_burst
212212
signal curr_burst_cnt : std_logic_vector(BURST_BITS - 1 downto 0);
213213
-- To restore burst cnt when amm_ready occurs

0 commit comments

Comments
 (0)