Skip to content

Commit

Permalink
Updated API
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Nov 20, 2023
1 parent 8d0dd35 commit 9c4e77e
Show file tree
Hide file tree
Showing 11 changed files with 495 additions and 380 deletions.
100 changes: 56 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,74 +15,86 @@ We present a highly flexible, high performance accelerator system that can be ad

![System](docs/overall.png)

## User API (WIP)
## User API

```py
from deepsocflow import Hardware, Bundle, QInput, BundleModel, QConvCore, QDenseCore, QAdd, QPool, Softmax, QLeakyReLu
from deepsocflow import Bundle, Hardware, QModel, QInput

'''
0. Specify Hardware
'''
hw = Hardware (
processing_elements = (8, 96),
frequency = 1000,
bits_input = 8,
bits_weights = 4,
bits_sum = 24,
bits_bias = 16,
max_kernel_size = (13, 13),
max_channels_in = 512,
max_channels_out = 512,
max_image_size = (32,32),
hw = Hardware ( # Alternatively: hw = Hardware.from_json('hardware.json')
processing_elements = (8, 96) , # (rows, columns) of multiply-add units
frequency_mhz = 250 , #
bits_input = 4 , # bit width of input pixels and activations
bits_weights = 4 , # bit width of weights
bits_sum = 16 , # bit width of accumulator
bits_bias = 16 , # bit width of bias
max_batch_size = 64 , #
max_channels_in = 2048 , #
max_kernel_size = 13 , #
max_image_size = 512 , #
ram_weights_depth = 20 , #
ram_edges_depth = 288 , #
axi_width = 64 , #
target_cpu_int_bits = 32 , #
valid_prob = 0.1 , # probability in which AXI-Stream s_valid signal should be toggled in simulation
ready_prob = 0.1 , # probability in which AXI-Stream m_ready signal should be toggled in simulation
data_dir = 'vectors', # directory to store generated test vectors
)
hw.export() # Generates: config_hw.svh, config_hw.tcl, config_hw.json
# Alternatively: hw = Hardware.from_json('config_hw.json')
hw.export() # Generates: config_hw.svh, config_hw.tcl, config_tb.svh, hardware.json
hw.export_vivado_tcl(board='zcu104')


'''
1. Build Model
'''
x = QInput( input_shape= (8,32,32,3), hw= hw, input_frac_bits= 4)

x = Bundle( core= QConvCore(filters= 32, kernel_size= (7,7), strides= (2,2), padding= 'same', weights_frac_bits= 4, bias_frac_bits= 8, activation= QLeakyReLu(negative_slope=0.125, frac_bits= 4 )),
pool= QPool(type= 'max', size= (3,3), strides= (1,1), padding= 'same', frac_bits= 4)
)(x)
x_skip = x
x = Bundle( core= QConvCore(filters= 64, kernel_size= (3,3), weights_frac_bits= 4, bias_frac_bits= 8, activation= QLeakyReLu(negative_slope=0, frac_bits= 4)),
pool= QAdd(x_skip), # Residual addition
flatten= True,
)(x)
x = Bundle( dense= QDenseCore(outputs= 10, weights_frac_bits= 4, bias_frac_bits= 8, activation= Softmax()),
)(x)
model = BundleModel(inputs=x_in, outputs=x)
# Alternatively: model = BundleModel.from_json('config_model.json')
XN = 1
input_shape = (XN,18,18,3) # (XN, XH, XW, CI)

QINT_BITS = 0
kq = f'quantized_bits({hw.K_BITS},{QINT_BITS},False,True,1)'
bq = f'quantized_bits({hw.B_BITS},{QINT_BITS},False,True,1)'
q1 = f'quantized_relu({hw.X_BITS},{QINT_BITS},negative_slope=0)'
q2 = f'quantized_bits({hw.X_BITS},{QINT_BITS},False,False,1)'
q3 = f'quantized_bits({hw.X_BITS},{QINT_BITS},False,True,1)'
q4 = f'quantized_relu({hw.X_BITS},{QINT_BITS},negative_slope=0.125)'

x = x_in = QInput(shape=input_shape[1:], batch_size=XN, hw=hw, int_bits=QINT_BITS, name='input')

x = x_skip1 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':(11,11), 'strides':(2,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q1}, pool= {'type':'avg', 'size':(3,4), 'strides':(2,3), 'padding':'same', 'act_str':f'quantized_bits({hw.X_BITS},0,False,False,1)'})(x)
x = x_skip2 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q2}, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip1)
x = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 7, 7), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':False, 'act_str':q3}, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip2)
x = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 5, 5), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q4}, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip1)
x = Bundle( core= {'type':'conv' , 'filters':24, 'kernel_size':( 3, 3), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q1},)(x)
x = Bundle( core= {'type':'conv' , 'filters':10, 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q4}, flatten= True)(x)
x = Bundle( core= {'type':'dense', 'units' :10, 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':q4})(x)

model = QModel(inputs=x_in.raw, outputs=x)
model.compile()
model.summary()

'''
2. TRAIN (using qkeras)
'''
model.compile(...)
model.fit(...)
model.export() # Generates: savedmodel, config_model.json
# model.fit(...)


'''
3. EXPORT FOR INFERENCE
- Runs forward pass in float32, records intermediate tensors
- Runs forward pass in integer, comparing with float32 pass for zero error
- Runs SystemVerilog testbench with the model & weights, randomizing handshakes, testing with actual C firmware in simulation
- Prints performance estimate (time, latency)
- Generates
- config_firmware.h
- weights.bin
- expected.bin
'''
model.export_inference(x=model.random_input) # -> config_firmware.h, weights.bin
SIM, SIM_PATH = 'xsim', "F:/Xilinx/Vivado/2022.1/bin/" # For Xilinx Vivado
# SIM, SIM_PATH = 'verilator', "" # For Verilator

model.export_inference(x=model.random_input, hw=hw) # Runs forward pass in float & int, compares them. Generates: config_fw.h (C firmware), weights.bin, expected.bin
model.verify_inference(SIM=SIM, SIM_PATH=SIM_PATH) # Runs SystemVerilog testbench with the model & weights, randomizing handshakes, testing with actual C firmware in simulation

'''
4. IMPLEMENTATION
a. FPGA: Run vivado.tcl
a. FPGA: Open vivado, source vivado_flow.tcl
b. ASIC: Set PDK paths, run syn.tcl & pnr.tcl
c. Compile C firmware with generated header (model.h) and run on device
c. Compile C firmware with generated header (config_fw.h) and run on device
'''
```

Expand Down
4 changes: 3 additions & 1 deletion deepsocflow/py/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from . import hardware, bundle
from .hardware import *
from .bundle import *
from .bundle import *
from .model import *
from .layers import *
21 changes: 7 additions & 14 deletions deepsocflow/py/layers.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
from qkeras import QActivation
from tensorflow.keras.layers import Input, Flatten, Add, MaxPooling2D
from tensorflow.keras.layers import Layer, Input, Flatten, Add, MaxPooling2D
import numpy as np

# class QInput(Input):
# def __init__(self, shape, batch_size, hw, frac_bits, name=None):

# self.hw = hw
# self.input_frac_bits = input_frac_bits
# super().__init__(shape=shape, name=name)

# int_bits = hw.X_BITS - self.frac_bits + 1

# x = Input(shape=shape, batch_size=batch_size, name=name)
# x = QActivation(f'quantized_bits(bits={hw.X_BITS}, integer={int_bits}, False,True,1)')(x)

# return x
def QInput(shape, batch_size, hw, int_bits, name=None):
x_raw = Input(shape=shape, batch_size=batch_size, name=name)
x = QActivation(f'quantized_bits({hw.X_BITS},{int_bits},False,True,1)')(x_raw)
x.raw = x_raw
x.hw = hw
return x



Loading

0 comments on commit 9c4e77e

Please sign in to comment.