diff --git a/deepsocflow/py/hardware.py b/deepsocflow/py/hardware.py index c8d1f58..5b56553 100644 --- a/deepsocflow/py/hardware.py +++ b/deepsocflow/py/hardware.py @@ -226,9 +226,6 @@ def simulate(self, SIM='verilator', SIM_PATH=''): cmd = f'{SIM_PATH}verilator --binary -j 0 -O3 --relative-includes --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DSIM -CFLAGS -I../ {self.MODULE_DIR}/c/sim.c -CFLAGS -g --Mdir ./' print(cmd) assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0 - - exit() - print("\n\nSIMULATING...\n\n") start = time.time() diff --git a/deepsocflow/py/xbundle.py b/deepsocflow/py/xbundle.py index d3c5f82..18ceb41 100644 --- a/deepsocflow/py/xbundle.py +++ b/deepsocflow/py/xbundle.py @@ -47,6 +47,8 @@ def call(self, input_tensor, x_add=None, training=False): self.prev_ib = x.ib BUNDLES[self.prev_ib].next_ibs += [self.ib] + print(f"{self.ib} x: {x.shape}, prev:{self.prev_ib}") + x = self.core(x) x = self.core.act(x) diff --git a/deepsocflow/py/xmodel.py b/deepsocflow/py/xmodel.py index 284f04f..4828ecd 100644 --- a/deepsocflow/py/xmodel.py +++ b/deepsocflow/py/xmodel.py @@ -39,7 +39,7 @@ def get_config(self): -def export_inference(model, hw): +def export_inference(model, hw, batch_size=1): for b in BUNDLES: b.next_ibs.clear() @@ -47,7 +47,7 @@ def export_inference(model, hw): BUNDLES.clear() user_model = model.layers[1] - input_shape = (hw.ROWS, *model.inputs[0].shape[1:]) + input_shape = (batch_size, *model.inputs[0].shape[1:]) x_keras = tf.random.uniform(input_shape) x_qtensor = user_model.input_quant_layer(x_keras) out_keras = model(x_keras) diff --git a/run/param_test.py b/run/param_test.py index daea4e3..ecb87bd 100644 --- a/run/param_test.py +++ b/run/param_test.py @@ -11,6 +11,7 @@ from keras.utils import to_categorical from qkeras.utils import load_qmodel import numpy as np +import pprint # import tensorflow as tf #tf.keras.utils.set_random_seed(0) @@ -175,24 +176,24 @@ def product_dict(**kwargs): yield dict(zip(kwargs.keys(), instance)) @pytest.mark.parametrize("PARAMS", list(product_dict( - processing_elements = [(8,24) ], + processing_elements = [(32,32) ], frequency_mhz = [ 250 ], bits_input = [ 4 ], bits_weights = [ 4 ], - bits_sum = [ 32 ], + bits_sum = [ 20 ], bits_bias = [ 16 ], max_batch_size = [ 64 ], max_channels_in = [ 2048 ], max_kernel_size = [ 9 ], max_image_size = [ 512 ], max_n_bundles = [ 64 ], - ram_weights_depth = [ 20 ], + ram_weights_depth = [ 512 ], ram_edges_depth = [ 288 ], - axi_width = [ 128 ], + axi_width = [ 64 ], config_baseaddr = ["B0000000"], target_cpu_int_bits = [ 32 ], - valid_prob = [ 0.1 ], - ready_prob = [ 0.01 ], + valid_prob = [ 1 ], + ready_prob = [ 1 ], data_dir = ['vectors'], ))) def test_dnn_engine(PARAMS): @@ -210,9 +211,10 @@ def test_dnn_engine(PARAMS): ''' VERIFY & EXPORT ''' - export_inference(loaded_model, hw) + export_inference(loaded_model, hw, batch_size=1) verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH) - seconds, bytes = predict_model_performance(hw) - print(f"Predicted time on hardware: {1000*seconds:.5f} ms") - print(f"Predicted data movement: {bytes/1000:.5f} kB") + d_perf = predict_model_performance(hw) + pp = pprint.PrettyPrinter(indent=4) + print(f"Predicted Performance") + pp.pprint(d_perf) \ No newline at end of file diff --git a/run/pointnet.py b/run/pointnet.py new file mode 100644 index 0000000..f8448ae --- /dev/null +++ b/run/pointnet.py @@ -0,0 +1,298 @@ +import os +import pytest +import itertools +import sys +sys.path.append("../../") +from tensorflow import keras +from keras.layers import Input +from keras.models import Model, save_model +from keras.datasets import mnist +from keras.optimizers import Adam +from keras.utils import to_categorical +from qkeras.utils import load_qmodel +import numpy as np +import pprint +#from read_point_cloud import * +#from preprocess import * +import tensorflow as tf +#tf.keras.utils.set_random_seed(0) + +from deepsocflow import * + + +(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '') +np.random.seed(42) + +''' +Dataset +''' + +NB_EPOCH = 2 +BATCH_SIZE = 64 +VALIDATION_SPLIT = 0.1 + +#input_shape = x_train.shape[1:] + +scale_factor = 80. +## Load data +""" +print("loading data...") +pmtxyz = get_pmtxyz("./work/pmt_xyz.dat") +X, y = torch.load("./work/preprocessed_data.pt") +X = X/100. +y[:,:] = y[:,:]/3.0 +y[:, :3] = y[:, :3]/scale_factor +y[:, :3] = y[:,:3] +#print(y[0]) +X_tf = tf.convert_to_tensor(X.numpy(), dtype=tf.float32) +y_tf = tf.convert_to_tensor(y.numpy(), dtype=tf.float32) +X_tf = tf.expand_dims(X_tf, axis=2) +debug = True +if debug: + print("debug got called") + small = 5000 + X_tf, y_tf = X_tf[:small], y_tf[:small] + + +# Update batch size +print(X_tf.shape) +n_data, n_hits, _, F_dim = X_tf.shape + +## switch to match Aobo's syntax (time, charge, x, y, z) -> (x, y, z, label, time, charge) +## insert "label" feature to tensor. This feature (0 or 1) is the activation of sensor +new_X = X_tf #preprocess(X_tf) + +## Shuffle Data (w/ Seed) +#np.random.seed(seed=args.seed) +#set_seed(seed=args.seed) +idx = np.random.permutation(new_X.shape[0]) +#new_X = tf.gather(new_X, idx) +#y = tf.gather(y_tf, idx) +## Split and Load data +train_split = 0.7 +val_split = 0.3 +train_idx = int(new_X.shape[0] * train_split) +val_idx = int(train_idx + new_X.shape[0] * train_split) +train = tf.data.Dataset.from_tensor_slices((new_X[:train_idx], y_tf[:train_idx])) +val = tf.data.Dataset.from_tensor_slices((new_X[train_idx:val_idx], y_tf[train_idx:val_idx])) +test = tf.data.Dataset.from_tensor_slices((new_X[val_idx:], y_tf[val_idx:])) +train_loader = train.shuffle(buffer_size=len(new_X)).batch(BATCH_SIZE) +val_loader = val.batch(BATCH_SIZE) +test_loader = val.batch(BATCH_SIZE) +print(f"num. total: {len(new_X)} train: {len(train)}, val: {len(val)}, test: {len(test)}") +#print(pmtxyz.shape, tf.shape(new_X), y_tf.shape) +""" +input_shape = (2126, 1, 5)#X_tf.shape[1:] +n_hits, _, F_dim = input_shape#X_tf.shape + +''' +Define Model +''' + +sys_bits = SYS_BITS(x=8, k=8, b=16) +dim = F_dim +dim_reduce_factor = 2 +out_dim = 4 #y_tf.shape[-1] +dimensions = dim +nhits = 2126 +encoder_input_shapes = [dimensions, 64, int(128 / dim_reduce_factor)] +(_, F1, F2), latent_dim = encoder_input_shapes, int(1024 / dim_reduce_factor) +decoder_input_shapes = latent_dim, int(512/dim_reduce_factor), int(128/dim_reduce_factor) +latent_dim, F3, F4 = decoder_input_shapes +#print("Test", F1, F2, dim, dim_reduce_factor, out_dim, dimensions) +@keras.saving.register_keras_serializable() +class UserModel(XModel): + def __init__(self, sys_bits, x_int_bits, *args, **kwargs): + super().__init__(sys_bits, x_int_bits, *args, **kwargs) + + self.b0 = XBundle( + core=XConvBN( + k_int_bits=0, + b_int_bits=0, + filters=F1, + kernel_size=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),), + #core=XDense( + # k_int_bits=0, + # b_int_bits=0, + # units=F1, + # act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0) + # ), + ) + + self.b1 = XBundle( + core=XConvBN( + k_int_bits=0, + b_int_bits=0, + filters=F2, + kernel_size=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),), + #core=XDense( + # k_int_bits=0, + # b_int_bits=0, + # units=F2, + # act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)), + ) + + self.b2 = XBundle( + core=XConvBN( + k_int_bits=0, + b_int_bits=0, + filters=latent_dim, + kernel_size=1, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),), + pool=XPool( + type='avg', + pool_size=(2126,1), + strides=(2126,1), + padding='same', + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),), + flatten=True + #core=XDense( + # k_int_bits=0, + # b_int_bits=0, + # units=latent_dim, + # act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)), + ) + + + self.b3 = XBundle( + core=XDense( + k_int_bits=0, + b_int_bits=0, + units=F3, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)), + ) + + self.b4 = XBundle( + core=XDense( + k_int_bits=0, + b_int_bits=0, + units=F4, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)), + ) + + self.b5 = XBundle( + core=XDense( + k_int_bits=0, + b_int_bits=0, + units=out_dim, + act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0.125)), + # flatten=True + ) + + def call (self, x): + x = self.input_quant_layer(x) + print('input', x.shape) + x = self.b0(x) + x = self.b1(x) + x = self.b2(x) + x = self.b3(x) + x = self.b4(x) + x = self.b5(x) + return x + +x = x_in = Input(input_shape, name="input") +user_model = UserModel(sys_bits=sys_bits, x_int_bits=0) +x = user_model(x_in) + +model = Model(inputs=[x_in], outputs=[x]) + + +''' +Train Model +''' +model.compile(loss="mse", optimizer=Adam(learning_rate=0.0001), metrics=["mse"]) +#history = model.fit( +# train_loader, +# #x_train, +# #y_train, +# batch_size=BATCH_SIZE, +# epochs=NB_EPOCH, +# #initial_epoch=1, +# verbose=True, +# ) + +print(model.submodules) +#print(y[:5], model(X_tf[:5])) +for layer in model.submodules: + try: + print(layer.summary()) + for w, weight in enumerate(layer.get_weights()): + print(layer.name, w, weight.shape) + except: + pass +# print_qstats(model.layers[1]) + +def summary_plus(layer, i=0): + if hasattr(layer, 'layers'): + if i != 0: + layer.summary() + for l in layer.layers: + i += 1 + summary_plus(l, i=i) + +print(summary_plus(model)) # OK +model.summary(expand_nested=True) + + +''' +Save & Reload +''' + +save_model(model, "mnist.h5") +loaded_model = load_qmodel("mnist.h5") + +#score = loaded_model.evaluate(test_loader, verbose=0) +#print(f"Test loss:{score[0]}, Test accuracy:{score[1]}") + + + + +def product_dict(**kwargs): + for instance in itertools.product(*(kwargs.values())): + yield dict(zip(kwargs.keys(), instance)) + +@pytest.mark.parametrize("PARAMS", list(product_dict( + processing_elements = [(16,32) ], + frequency_mhz = [ 250 ], + bits_input = [ 8 ], + bits_weights = [ 8 ], + bits_sum = [ 32 ], + bits_bias = [ 16 ], + max_batch_size = [ 64 ], + max_channels_in = [ 2048 ], + max_kernel_size = [ 9 ], + max_image_size = [ 2126 ], + max_n_bundles = [ 64 ], + ram_weights_depth = [ 20 ], + ram_edges_depth = [ 288 ], + axi_width = [ 128 ], + config_baseaddr = ["B0000000"], + target_cpu_int_bits = [ 32 ], + valid_prob = [ 1 ], + ready_prob = [ 1 ], + data_dir = ['vectors'], + ))) +def test_dnn_engine(PARAMS): + + ''' + SPECIFY HARDWARE + ''' + hw = Hardware (**PARAMS) + hw.export_json() + hw = Hardware.from_json('hardware.json') + hw.export() # Generates: config_hw.svh, config_hw.tcl + hw.export_vivado_tcl(board='zcu104') + + + ''' + VERIFY & EXPORT + ''' + export_inference(loaded_model, hw, hw.ROWS) + verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH) + + d_perf = predict_model_performance(hw) + pp = pprint.PrettyPrinter(indent=4) + print(f"Predicted Performance") + pp.pprint(d_perf) diff --git a/run/resnet50.py b/run/resnet50.py index aa8d9f5..c8f0b1a 100644 --- a/run/resnet50.py +++ b/run/resnet50.py @@ -11,6 +11,7 @@ from keras.utils import to_categorical from qkeras.utils import load_qmodel import numpy as np +import pprint # import tensorflow as tf #tf.keras.utils.set_random_seed(0) @@ -404,60 +405,61 @@ def __init__(self, sys_bits, x_int_bits, *args, **kwargs): def call (self, x): x = self.input_quant_layer(x) - x = self.b1(x) - x_skip1 = self.sk1(x) - x = self.b2(x) - x = self.b3(x) - x = x_skip2 = self.sk2(x, x_skip1) - x = self.b4(x) - x = self.b5(x) - x = x_skip3 = self.sk3(x, x_skip2) - x = self.b6(x) - x = self.b7(x) - x = x_skip4 = self.sk4(x, x_skip3) - x_skip5 = self.sk5(x) - x = self.b8(x) - x = self.b9(x) - x = x_skip6 = self.sk6(x, x_skip5) - x = self.b10(x) - x = self.b11(x) - x = x_skip7 = self.sk7(x, x_skip6) - x = self.b12(x) - x = self.b13(x) - x = x_skip8 = self.sk8(x, x_skip7) - x = self.b14(x) - x = self.b15(x) - x = x_skip9 = self.sk9(x, x_skip8) - x_skip10 = self.sk10(x) - x = self.b16(x) - x = self.b17(x) - x = x_skip11 = self.sk11(x, x_skip10) - x = self.b18(x) - x = self.b19(x) - x = x_skip12 = self.sk12(x, x_skip11) - x = self.b20(x) - x = self.b21(x) - x = x_skip13 = self.sk13(x, x_skip12) - x = self.b22(x) - x = self.b23(x) - x = x_skip14 = self.sk14(x, x_skip13) - x = self.b24(x) - x = self.b25(x) - x = x_skip15 = self.sk15(x, x_skip14) - x = self.b26(x) - x = self.b27(x) - x = x_skip16 = self.sk16(x, x_skip15) - x_skip17 = self.sk17(x) - x = self.b28(x) - x = self.b29(x) - x = x_skip18 = self.sk18(x, x_skip17) - x = self.b30(x) - x = self.b31(x) - x = x_skip19 = self.sk19(x, x_skip18) - x = self.b32(x) - x = self.b33(x) - x = x_skip20 = self.sk20(x, x_skip19) - x = self.b34(x) + x = self.b1(x) # 0 + x_skip1 = self.sk1(x) # 1 + x = self.b2(x) # 2 + x = self.b3(x) # 3 + x = x_skip2 = self.sk2(x, x_skip1) # 4 + x = self.b4(x) # 5 + x = self.b5(x) # 6 + x = x_skip3 = self.sk3(x, x_skip2) # 7 + x = self.b6(x) # 8 + x = self.b7(x) # 9 + x = x_skip4 = self.sk4(x, x_skip3) # 10 + x_skip5 = self.sk5(x) # 11 + x = self.b8(x) # 12 + x = self.b9(x) # 13 + x = x_skip6 = self.sk6(x, x_skip5) # 14 + x = self.b10(x) # 15 + x = self.b11(x) # 16 + x = x_skip7 = self.sk7(x, x_skip6) # 17 + x = self.b12(x) # 18 + x = self.b13(x) # 19 + x = x_skip8 = self.sk8(x, x_skip7) # 20 + x = self.b14(x) # 21 + x = self.b15(x) # 22 + x = x_skip9 = self.sk9(x, x_skip8) # 23 + x_skip10 = self.sk10(x) # 24 + x = self.b16(x) # 25 + x = self.b17(x) # 26 + x = x_skip11 = self.sk11(x, x_skip10) # 27 + x = self.b18(x) # 28 + x = self.b19(x) # 29 + x = x_skip12 = self.sk12(x, x_skip11) # 30 + x = self.b20(x) # 31 + x = self.b21(x) # 32 + x = x_skip13 = self.sk13(x, x_skip12) # 33 + x = self.b22(x) # 34 + x = self.b23(x) # 35 + x = x_skip14 = self.sk14(x, x_skip13) # 36 + x = self.b24(x) # 37 + x = self.b25(x) # 38 + x = x_skip15 = self.sk15(x, x_skip14) # 39 + x = self.b26(x) # 40 + x = self.b27(x) # 41 + x = x_skip16 = self.sk16(x, x_skip15) # 42 + x_skip17 = self.sk17(x) # 43 + x = self.b28(x) # 44 + x = self.b29(x) # 45 + x = x_skip18 = self.sk18(x, x_skip17) # 46 + x = self.b30(x) # 47 + x = self.b31(x) # 48 + x = x_skip19 = self.sk19(x, x_skip18) # 49 + x = self.b32(x) # 50 + x = self.b33(x) # 51 + x = x_skip20 = self.sk20(x, x_skip19) # 52 + x = self.b34(x) # 53 + exit() return x x = x_in = Input(input_shape, name="input") @@ -537,8 +539,8 @@ def product_dict(**kwargs): axi_width = [ 128 ], config_baseaddr = ["B0000000"], target_cpu_int_bits = [ 32 ], - valid_prob = [ 0.1 ], - ready_prob = [ 0.01 ], + valid_prob = [ 1 ], + ready_prob = [ 1 ], data_dir = ['vectors'], ))) def test_dnn_engine(PARAMS): @@ -556,9 +558,10 @@ def test_dnn_engine(PARAMS): ''' VERIFY & EXPORT ''' - export_inference(loaded_model, hw) + export_inference(loaded_model, hw, batch_size=1) verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH) - seconds, bytes = predict_model_performance(hw) - print(f"Predicted time on hardware: {1000*seconds:.5f} ms") - print(f"Predicted data movement: {bytes/1000:.5f} kB") + d_perf = predict_model_performance(hw) + pp = pprint.PrettyPrinter(indent=4) + print(f"Predicted Performance") + pp.pprint(d_perf)