Skip to content

Commit

Permalink
Pointnet works
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Aug 1, 2024
1 parent 25c6d9b commit c7f7889
Show file tree
Hide file tree
Showing 6 changed files with 377 additions and 75 deletions.
3 changes: 0 additions & 3 deletions deepsocflow/py/hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,6 @@ def simulate(self, SIM='verilator', SIM_PATH=''):
cmd = f'{SIM_PATH}verilator --binary -j 0 -O3 --relative-includes --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DSIM -CFLAGS -I../ {self.MODULE_DIR}/c/sim.c -CFLAGS -g --Mdir ./'
print(cmd)
assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0

exit()

print("\n\nSIMULATING...\n\n")
start = time.time()

Expand Down
2 changes: 2 additions & 0 deletions deepsocflow/py/xbundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def call(self, input_tensor, x_add=None, training=False):
self.prev_ib = x.ib
BUNDLES[self.prev_ib].next_ibs += [self.ib]

print(f"{self.ib} x: {x.shape}, prev:{self.prev_ib}")

x = self.core(x)
x = self.core.act(x)

Expand Down
4 changes: 2 additions & 2 deletions deepsocflow/py/xmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def get_config(self):



def export_inference(model, hw):
def export_inference(model, hw, batch_size=1):

for b in BUNDLES:
b.next_ibs.clear()
b.next_add_ibs.clear()
BUNDLES.clear()

user_model = model.layers[1]
input_shape = (hw.ROWS, *model.inputs[0].shape[1:])
input_shape = (batch_size, *model.inputs[0].shape[1:])
x_keras = tf.random.uniform(input_shape)
x_qtensor = user_model.input_quant_layer(x_keras)
out_keras = model(x_keras)
Expand Down
22 changes: 12 additions & 10 deletions run/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from keras.utils import to_categorical
from qkeras.utils import load_qmodel
import numpy as np
import pprint
# import tensorflow as tf
#tf.keras.utils.set_random_seed(0)

Expand Down Expand Up @@ -175,24 +176,24 @@ def product_dict(**kwargs):
yield dict(zip(kwargs.keys(), instance))

@pytest.mark.parametrize("PARAMS", list(product_dict(
processing_elements = [(8,24) ],
processing_elements = [(32,32) ],
frequency_mhz = [ 250 ],
bits_input = [ 4 ],
bits_weights = [ 4 ],
bits_sum = [ 32 ],
bits_sum = [ 20 ],
bits_bias = [ 16 ],
max_batch_size = [ 64 ],
max_channels_in = [ 2048 ],
max_kernel_size = [ 9 ],
max_image_size = [ 512 ],
max_n_bundles = [ 64 ],
ram_weights_depth = [ 20 ],
ram_weights_depth = [ 512 ],
ram_edges_depth = [ 288 ],
axi_width = [ 128 ],
axi_width = [ 64 ],
config_baseaddr = ["B0000000"],
target_cpu_int_bits = [ 32 ],
valid_prob = [ 0.1 ],
ready_prob = [ 0.01 ],
valid_prob = [ 1 ],
ready_prob = [ 1 ],
data_dir = ['vectors'],
)))
def test_dnn_engine(PARAMS):
Expand All @@ -210,9 +211,10 @@ def test_dnn_engine(PARAMS):
'''
VERIFY & EXPORT
'''
export_inference(loaded_model, hw)
export_inference(loaded_model, hw, batch_size=1)
verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH)

seconds, bytes = predict_model_performance(hw)
print(f"Predicted time on hardware: {1000*seconds:.5f} ms")
print(f"Predicted data movement: {bytes/1000:.5f} kB")
d_perf = predict_model_performance(hw)
pp = pprint.PrettyPrinter(indent=4)
print(f"Predicted Performance")
pp.pprint(d_perf)
298 changes: 298 additions & 0 deletions run/pointnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
import os
import pytest
import itertools
import sys
sys.path.append("../../")
from tensorflow import keras
from keras.layers import Input
from keras.models import Model, save_model
from keras.datasets import mnist
from keras.optimizers import Adam
from keras.utils import to_categorical
from qkeras.utils import load_qmodel
import numpy as np
import pprint
#from read_point_cloud import *
#from preprocess import *
import tensorflow as tf
#tf.keras.utils.set_random_seed(0)

from deepsocflow import *


(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '')
np.random.seed(42)

'''
Dataset
'''

NB_EPOCH = 2
BATCH_SIZE = 64
VALIDATION_SPLIT = 0.1

#input_shape = x_train.shape[1:]

scale_factor = 80.
## Load data
"""
print("loading data...")
pmtxyz = get_pmtxyz("./work/pmt_xyz.dat")
X, y = torch.load("./work/preprocessed_data.pt")
X = X/100.
y[:,:] = y[:,:]/3.0
y[:, :3] = y[:, :3]/scale_factor
y[:, :3] = y[:,:3]
#print(y[0])
X_tf = tf.convert_to_tensor(X.numpy(), dtype=tf.float32)
y_tf = tf.convert_to_tensor(y.numpy(), dtype=tf.float32)
X_tf = tf.expand_dims(X_tf, axis=2)
debug = True
if debug:
print("debug got called")
small = 5000
X_tf, y_tf = X_tf[:small], y_tf[:small]
# Update batch size
print(X_tf.shape)
n_data, n_hits, _, F_dim = X_tf.shape
## switch to match Aobo's syntax (time, charge, x, y, z) -> (x, y, z, label, time, charge)
## insert "label" feature to tensor. This feature (0 or 1) is the activation of sensor
new_X = X_tf #preprocess(X_tf)
## Shuffle Data (w/ Seed)
#np.random.seed(seed=args.seed)
#set_seed(seed=args.seed)
idx = np.random.permutation(new_X.shape[0])
#new_X = tf.gather(new_X, idx)
#y = tf.gather(y_tf, idx)
## Split and Load data
train_split = 0.7
val_split = 0.3
train_idx = int(new_X.shape[0] * train_split)
val_idx = int(train_idx + new_X.shape[0] * train_split)
train = tf.data.Dataset.from_tensor_slices((new_X[:train_idx], y_tf[:train_idx]))
val = tf.data.Dataset.from_tensor_slices((new_X[train_idx:val_idx], y_tf[train_idx:val_idx]))
test = tf.data.Dataset.from_tensor_slices((new_X[val_idx:], y_tf[val_idx:]))
train_loader = train.shuffle(buffer_size=len(new_X)).batch(BATCH_SIZE)
val_loader = val.batch(BATCH_SIZE)
test_loader = val.batch(BATCH_SIZE)
print(f"num. total: {len(new_X)} train: {len(train)}, val: {len(val)}, test: {len(test)}")
#print(pmtxyz.shape, tf.shape(new_X), y_tf.shape)
"""
input_shape = (2126, 1, 5)#X_tf.shape[1:]
n_hits, _, F_dim = input_shape#X_tf.shape

'''
Define Model
'''

sys_bits = SYS_BITS(x=8, k=8, b=16)
dim = F_dim
dim_reduce_factor = 2
out_dim = 4 #y_tf.shape[-1]
dimensions = dim
nhits = 2126
encoder_input_shapes = [dimensions, 64, int(128 / dim_reduce_factor)]
(_, F1, F2), latent_dim = encoder_input_shapes, int(1024 / dim_reduce_factor)
decoder_input_shapes = latent_dim, int(512/dim_reduce_factor), int(128/dim_reduce_factor)
latent_dim, F3, F4 = decoder_input_shapes
#print("Test", F1, F2, dim, dim_reduce_factor, out_dim, dimensions)
@keras.saving.register_keras_serializable()
class UserModel(XModel):
def __init__(self, sys_bits, x_int_bits, *args, **kwargs):
super().__init__(sys_bits, x_int_bits, *args, **kwargs)

self.b0 = XBundle(
core=XConvBN(
k_int_bits=0,
b_int_bits=0,
filters=F1,
kernel_size=1,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
#core=XDense(
# k_int_bits=0,
# b_int_bits=0,
# units=F1,
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)
# ),
)

self.b1 = XBundle(
core=XConvBN(
k_int_bits=0,
b_int_bits=0,
filters=F2,
kernel_size=1,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
#core=XDense(
# k_int_bits=0,
# b_int_bits=0,
# units=F2,
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
)

self.b2 = XBundle(
core=XConvBN(
k_int_bits=0,
b_int_bits=0,
filters=latent_dim,
kernel_size=1,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
pool=XPool(
type='avg',
pool_size=(2126,1),
strides=(2126,1),
padding='same',
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),),
flatten=True
#core=XDense(
# k_int_bits=0,
# b_int_bits=0,
# units=latent_dim,
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
)


self.b3 = XBundle(
core=XDense(
k_int_bits=0,
b_int_bits=0,
units=F3,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
)

self.b4 = XBundle(
core=XDense(
k_int_bits=0,
b_int_bits=0,
units=F4,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
)

self.b5 = XBundle(
core=XDense(
k_int_bits=0,
b_int_bits=0,
units=out_dim,
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0.125)),
# flatten=True
)

def call (self, x):
x = self.input_quant_layer(x)
print('input', x.shape)
x = self.b0(x)
x = self.b1(x)
x = self.b2(x)
x = self.b3(x)
x = self.b4(x)
x = self.b5(x)
return x

x = x_in = Input(input_shape, name="input")
user_model = UserModel(sys_bits=sys_bits, x_int_bits=0)
x = user_model(x_in)

model = Model(inputs=[x_in], outputs=[x])


'''
Train Model
'''
model.compile(loss="mse", optimizer=Adam(learning_rate=0.0001), metrics=["mse"])
#history = model.fit(
# train_loader,
# #x_train,
# #y_train,
# batch_size=BATCH_SIZE,
# epochs=NB_EPOCH,
# #initial_epoch=1,
# verbose=True,
# )

print(model.submodules)
#print(y[:5], model(X_tf[:5]))
for layer in model.submodules:
try:
print(layer.summary())
for w, weight in enumerate(layer.get_weights()):
print(layer.name, w, weight.shape)
except:
pass
# print_qstats(model.layers[1])

def summary_plus(layer, i=0):
if hasattr(layer, 'layers'):
if i != 0:
layer.summary()
for l in layer.layers:
i += 1
summary_plus(l, i=i)

print(summary_plus(model)) # OK
model.summary(expand_nested=True)


'''
Save & Reload
'''

save_model(model, "mnist.h5")
loaded_model = load_qmodel("mnist.h5")

#score = loaded_model.evaluate(test_loader, verbose=0)
#print(f"Test loss:{score[0]}, Test accuracy:{score[1]}")




def product_dict(**kwargs):
for instance in itertools.product(*(kwargs.values())):
yield dict(zip(kwargs.keys(), instance))

@pytest.mark.parametrize("PARAMS", list(product_dict(
processing_elements = [(16,32) ],
frequency_mhz = [ 250 ],
bits_input = [ 8 ],
bits_weights = [ 8 ],
bits_sum = [ 32 ],
bits_bias = [ 16 ],
max_batch_size = [ 64 ],
max_channels_in = [ 2048 ],
max_kernel_size = [ 9 ],
max_image_size = [ 2126 ],
max_n_bundles = [ 64 ],
ram_weights_depth = [ 20 ],
ram_edges_depth = [ 288 ],
axi_width = [ 128 ],
config_baseaddr = ["B0000000"],
target_cpu_int_bits = [ 32 ],
valid_prob = [ 1 ],
ready_prob = [ 1 ],
data_dir = ['vectors'],
)))
def test_dnn_engine(PARAMS):

'''
SPECIFY HARDWARE
'''
hw = Hardware (**PARAMS)
hw.export_json()
hw = Hardware.from_json('hardware.json')
hw.export() # Generates: config_hw.svh, config_hw.tcl
hw.export_vivado_tcl(board='zcu104')


'''
VERIFY & EXPORT
'''
export_inference(loaded_model, hw, hw.ROWS)
verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH)

d_perf = predict_model_performance(hw)
pp = pprint.PrettyPrinter(indent=4)
print(f"Predicted Performance")
pp.pprint(d_perf)
Loading

0 comments on commit c7f7889

Please sign in to comment.