diff --git a/deepsocflow/py/hardware.py b/deepsocflow/py/hardware.py index 5b56553..8ccfebb 100644 --- a/deepsocflow/py/hardware.py +++ b/deepsocflow/py/hardware.py @@ -223,7 +223,7 @@ def simulate(self, SIM='verilator', SIM_PATH=''): assert subprocess.run(cmd).returncode == 0 if SIM == "verilator": - cmd = f'{SIM_PATH}verilator --binary -j 0 -O3 --relative-includes --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DSIM -CFLAGS -I../ {self.MODULE_DIR}/c/sim.c -CFLAGS -g --Mdir ./' + cmd = f'{SIM_PATH}verilator --binary -j 0 -O3 --relative-includes --trace --trace-depth 0 --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DSIM -CFLAGS -I../ {self.MODULE_DIR}/c/sim.c -CFLAGS -g --Mdir ./' print(cmd) assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0 print("\n\nSIMULATING...\n\n") diff --git a/deepsocflow/test/sv/axi_sys_tb.sv b/deepsocflow/test/sv/axi_sys_tb.sv index e13eac8..f08d5ac 100644 --- a/deepsocflow/test/sv/axi_sys_tb.sv +++ b/deepsocflow/test/sv/axi_sys_tb.sv @@ -91,8 +91,8 @@ module axi_sys_tb; initial begin $dumpfile("axi_tb_sys.vcd"); $dumpvars(); - // #200us; - // $finish; + #2000us; + $finish; end chandle mpv, cp; diff --git a/run/resnet50.py b/run/resnet50.py index c8f0b1a..773e5d5 100644 --- a/run/resnet50.py +++ b/run/resnet50.py @@ -447,7 +447,7 @@ def call (self, x): x = x_skip15 = self.sk15(x, x_skip14) # 39 x = self.b26(x) # 40 x = self.b27(x) # 41 - x = x_skip16 = self.sk16(x, x_skip15) # 42 + x = self.sk16(x, x_skip15) # 42 x_skip17 = self.sk17(x) # 43 x = self.b28(x) # 44 x = self.b29(x) # 45 @@ -459,7 +459,6 @@ def call (self, x): x = self.b33(x) # 51 x = x_skip20 = self.sk20(x, x_skip19) # 52 x = self.b34(x) # 53 - exit() return x x = x_in = Input(input_shape, name="input") diff --git a/run/stuck.py b/run/stuck.py new file mode 100644 index 0000000..a7e8b56 --- /dev/null +++ b/run/stuck.py @@ -0,0 +1,135 @@ +import os +import pytest +import itertools +import sys +sys.path.append("../../") +from tensorflow import keras +from keras.layers import Input +from keras.models import Model, save_model +from keras.datasets import mnist +from keras.optimizers import Adam +from keras.utils import to_categorical +from qkeras.utils import load_qmodel +import numpy as np +import pprint +# import tensorflow as tf +#tf.keras.utils.set_random_seed(0) + +from deepsocflow import * + +(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '') + + +input_shape = (14,14,256) +sys_bits = SYS_BITS(x=4, k=4, b=16) + +@keras.saving.register_keras_serializable() +class UserModel(XModel): + def __init__(self, sys_bits, x_int_bits, *args, **kwargs): + super().__init__(sys_bits, x_int_bits, *args, **kwargs) + + self.b0 = XBundle( + core=XConvBN( + k_int_bits=0, b_int_bits=0, filters=1024, kernel_size=1,strides=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),), + # add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0) + ) + + self.b1 = XBundle( + core=XConvBN( + k_int_bits=0, b_int_bits=0, filters=256, kernel_size=1,strides=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),), + ) + + self.b2 = XBundle( + core=XConvBN( + k_int_bits=0, b_int_bits=0, filters=256, kernel_size=3,strides=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),), + ) + + self.b3 = XBundle( + core=XConvBN( + k_int_bits=0, b_int_bits=0, filters=1024, kernel_size=1,strides=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),), + add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0) + ) + + self.b4 = XBundle( + core=XConvBN( + k_int_bits=0, b_int_bits=0, filters=2048, kernel_size=1,strides=2, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),), + # add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0) + ) + + + def call (self, x): + x = self.input_quant_layer(x) + + x = x_skip15 = self.b0(x) # 39 + x = self.b1(x) # 40 + x = self.b2(x) # 41 + x = self.b3(x, x_skip15) # 42 + x = self.b4(x) # 43 + + return x + +x = x_in = Input(input_shape, name="input") +user_model = UserModel(sys_bits=sys_bits, x_int_bits=0) +x = user_model(x_in) + +model = Model(inputs=[x_in], outputs=[x]) +model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.0001), metrics=["accuracy"]) + +''' +Save & Reload +''' +save_model(model, "resnet50.h5") +loaded_model = load_qmodel("resnet50.h5") + +def product_dict(**kwargs): + for instance in itertools.product(*(kwargs.values())): + yield dict(zip(kwargs.keys(), instance)) + +@pytest.mark.parametrize("PARAMS", list(product_dict( + processing_elements = [(7,96) ], + frequency_mhz = [ 250 ], + bits_input = [ 4 ], + bits_weights = [ 4 ], + bits_sum = [ 20 ], + bits_bias = [ 16 ], + max_batch_size = [ 64 ], + max_channels_in = [ 512 ], + max_kernel_size = [ 9 ], + max_image_size = [ 512 ], + max_n_bundles = [ 64 ], + ram_weights_depth = [ 512 ], + ram_edges_depth = [ 3584 ], + axi_width = [ 128 ], + config_baseaddr = ["B0000000"], + target_cpu_int_bits = [ 32 ], + valid_prob = [ 1 ], + ready_prob = [ 1 ], + data_dir = ['vectors'], + ))) +def test_dnn_engine(PARAMS): + + ''' + SPECIFY HARDWARE + ''' + hw = Hardware (**PARAMS) + hw.export_json() + hw = Hardware.from_json('hardware.json') + hw.export() # Generates: config_hw.svh, config_hw.tcl + hw.export_vivado_tcl(board='zcu104') + + + ''' + VERIFY & EXPORT + ''' + export_inference(loaded_model, hw, batch_size=1) + verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH) + + d_perf = predict_model_performance(hw) + pp = pprint.PrettyPrinter(indent=4) + print(f"Predicted Performance") + pp.pprint(d_perf)