Skip to content

Commit

Permalink
Special case for a Conv with a single output pixel
Browse files Browse the repository at this point in the history
  • Loading branch information
vloncar authored and jmduarte committed Apr 23, 2021
1 parent bafb268 commit a6cf3c0
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 0 deletions.
2 changes: 2 additions & 0 deletions hls4ml/model/optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from hls4ml.model.optimizer.passes.conv_same_pad import InsertZeroPaddingBeforeConv1D
from hls4ml.model.optimizer.passes.conv_same_pad import InsertZeroPaddingBeforeConv2D
from hls4ml.model.optimizer.passes.pointwise import OptimizePointwiseConv
from hls4ml.model.optimizer.passes.conv_single_output import OptimizeSingleOutConv
from hls4ml.model.optimizer.passes.clone import CloneOutput
from hls4ml.model.optimizer.passes.repack_stream import ReshapeStream, BroadcastStream

Expand All @@ -29,6 +30,7 @@
register_pass('conv1d_same_pad', InsertZeroPaddingBeforeConv1D)
register_pass('conv2d_same_pad', InsertZeroPaddingBeforeConv2D)
register_pass('optimize_pointwise_conv', OptimizePointwiseConv)
register_pass('optimize_single_out_conv', OptimizeSingleOutConv)
register_pass('clone_output', CloneOutput)
register_pass('reshape_stream', ReshapeStream)
register_pass('broadcast_stream', BroadcastStream)
Expand Down
57 changes: 57 additions & 0 deletions hls4ml/model/optimizer/passes/conv_single_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np
import re

from hls4ml.model.optimizer import OptimizerPass
from hls4ml.model.hls_model import Conv1D, Conv2D, register_layer
from hls4ml.templates import templates

class SingleOutputConv1D(Conv1D):
''' Optimized Conv1D implementation for kernel_size = input_size resulting in single output pixel. '''

# Nothing to do, will pick up function and config from class name
pass

class SingleOutputConv2D(Conv2D):
''' Optimized Conv2D implementation for kernel_size = input_size resulting in single output pixel. '''

# Nothing to do, will pick up function and config from class name
pass

single_out_conv1d_function_template = 'nnet::single_output_conv_1d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'
single_out_conv2d_function_template = 'nnet::single_output_conv_2d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'

single_out_conv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_conv1d_stream.h']
single_out_conv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_conv2d_stream.h']

# Register the layer types to the layer map
register_layer('SingleOutputConv1D', SingleOutputConv1D)
register_layer('SingleOutputConv2D', SingleOutputConv2D)

# Register the templates for config and function
templates.get_backend('Vivado').register_templates(
'SingleOutputConv1D',
single_out_conv1d_function_template,
templates.get_backend('Vivado').get_config_template('Conv1D'),
single_out_conv1d_include_list
)

templates.get_backend('Vivado').register_templates(
'SingleOutputConv2D',
single_out_conv2d_function_template,
templates.get_backend('Vivado').get_config_template('Conv2D'),
single_out_conv2d_include_list
)

class OptimizeSingleOutConv(OptimizerPass):
def match(self, node):
return node.__class__.__name__ in ['Conv1D', 'Conv2D'] and \
node.get_attr('filt_height', 1) == node.get_attr('in_height', 1) and \
node.get_attr('filt_width') == node.get_attr('in_width') and \
node.get_attr('out_height', 1) == 1 and node.get_attr('out_width') == 1

def transform(self, model, node):
dim = node.__class__.__name__[-2:] # '1D' or '2D'
pw_node = model.make_node('SingleOutputConv' + dim, node.name, node.attributes.copy(), node.inputs.copy())
model.replace_node(node, pw_node)

return True
42 changes: 42 additions & 0 deletions hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,47 @@ void conv_1d_cl(
}
}

template<class data_T, class res_T, typename CONFIG_T>
void single_output_conv_1d_cl(
hls::stream<data_T> &data,
hls::stream<res_T> &res,
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt])
{
assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
assert(CONFIG_T::filt_width == CONFIG_T::in_width);
assert(CONFIG_T::out_width == 1);

typename data_T::value_type data_window[CONFIG_T::filt_width * CONFIG_T::n_chan];
#pragma HLS ARRAY_PARTITION variable=data_window complete
typename res_T::value_type res_elem[CONFIG_T::n_filt];
#pragma HLS ARRAY_PARTITION variable=res_elem complete
res_T res_pack;
#pragma HLS DATA_PACK variable=res_pack

ReadInputImage: for (unsigned i = 0; i < CONFIG_T::in_width; i++) {
#pragma HLS PIPELINE
data_T in_elem = data.read();
CopyDataChan: for (unsigned c = 0; c < CONFIG_T::n_chan; c++) {
#pragma HLS UNROLL
data_window[i * CONFIG_T::n_chan + c] = in_elem[c];
}
}

#pragma HLS INLINE region
if (CONFIG_T::strategy == nnet::latency) {
dense_latency<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(data_window, res_elem, weights, biases);
} else {
dense_resource<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(data_window, res_elem, weights, biases);
}

CastLoop: for (unsigned jj = 0; jj < CONFIG_T::n_filt; jj++) {
#pragma HLS UNROLL
res_pack[jj] = res_elem[jj];
}

res.write(res_pack);
}

}
#endif
42 changes: 42 additions & 0 deletions hls4ml/templates/vivado/nnet_utils/nnet_conv2d_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,47 @@ void conv_2d_cl(
}
}

template<class data_T, class res_T, typename CONFIG_T>
void single_output_conv_2d_cl(
hls::stream<data_T> &data,
hls::stream<res_T> &res,
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt])
{
assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
assert(CONFIG_T::filt_height == CONFIG_T::in_height && CONFIG_T::filt_width == CONFIG_T::in_width);
assert(CONFIG_T::out_height == 1 && CONFIG_T::out_width == 1);

typename data_T::value_type data_window[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan];
#pragma HLS ARRAY_PARTITION variable=data_window complete
typename res_T::value_type res_elem[CONFIG_T::n_filt];
#pragma HLS ARRAY_PARTITION variable=res_elem complete
res_T res_pack;
#pragma HLS DATA_PACK variable=res_pack

ReadInputImage: for (unsigned i = 0; i < CONFIG_T::in_height * CONFIG_T::in_width; i++) {
#pragma HLS PIPELINE
data_T in_elem = data.read();
CopyDataChan: for (unsigned c = 0; c < CONFIG_T::n_chan; c++) {
#pragma HLS UNROLL
data_window[i * CONFIG_T::n_chan + c] = in_elem[c];
}
}

#pragma HLS INLINE region
if (CONFIG_T::strategy == nnet::latency) {
dense_latency<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(data_window, res_elem, weights, biases);
} else {
dense_resource<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(data_window, res_elem, weights, biases);
}

CastLoop: for (unsigned jj = 0; jj < CONFIG_T::n_filt; jj++) {
#pragma HLS UNROLL
res_pack[jj] = res_elem[jj];
}

res.write(res_pack);
}

}
#endif

0 comments on commit a6cf3c0

Please sign in to comment.