From a6cf3c0bb12d492e1d0004bb4df88235f99f3daf Mon Sep 17 00:00:00 2001 From: Vladimir Loncar Date: Fri, 23 Apr 2021 19:50:38 +0200 Subject: [PATCH] Special case for a Conv with a single output pixel --- hls4ml/model/optimizer/__init__.py | 2 + .../optimizer/passes/conv_single_output.py | 57 +++++++++++++++++++ .../vivado/nnet_utils/nnet_conv1d_stream.h | 42 ++++++++++++++ .../vivado/nnet_utils/nnet_conv2d_stream.h | 42 ++++++++++++++ 4 files changed, 143 insertions(+) create mode 100644 hls4ml/model/optimizer/passes/conv_single_output.py diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 08c698afe0..080eaf3b5b 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -10,6 +10,7 @@ from hls4ml.model.optimizer.passes.conv_same_pad import InsertZeroPaddingBeforeConv1D from hls4ml.model.optimizer.passes.conv_same_pad import InsertZeroPaddingBeforeConv2D from hls4ml.model.optimizer.passes.pointwise import OptimizePointwiseConv +from hls4ml.model.optimizer.passes.conv_single_output import OptimizeSingleOutConv from hls4ml.model.optimizer.passes.clone import CloneOutput from hls4ml.model.optimizer.passes.repack_stream import ReshapeStream, BroadcastStream @@ -29,6 +30,7 @@ register_pass('conv1d_same_pad', InsertZeroPaddingBeforeConv1D) register_pass('conv2d_same_pad', InsertZeroPaddingBeforeConv2D) register_pass('optimize_pointwise_conv', OptimizePointwiseConv) +register_pass('optimize_single_out_conv', OptimizeSingleOutConv) register_pass('clone_output', CloneOutput) register_pass('reshape_stream', ReshapeStream) register_pass('broadcast_stream', BroadcastStream) diff --git a/hls4ml/model/optimizer/passes/conv_single_output.py b/hls4ml/model/optimizer/passes/conv_single_output.py new file mode 100644 index 0000000000..94abbd97b6 --- /dev/null +++ b/hls4ml/model/optimizer/passes/conv_single_output.py @@ -0,0 +1,57 @@ +import numpy as np +import re + +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.hls_model import Conv1D, Conv2D, register_layer +from hls4ml.templates import templates + +class SingleOutputConv1D(Conv1D): + ''' Optimized Conv1D implementation for kernel_size = input_size resulting in single output pixel. ''' + + # Nothing to do, will pick up function and config from class name + pass + +class SingleOutputConv2D(Conv2D): + ''' Optimized Conv2D implementation for kernel_size = input_size resulting in single output pixel. ''' + + # Nothing to do, will pick up function and config from class name + pass + +single_out_conv1d_function_template = 'nnet::single_output_conv_1d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' +single_out_conv2d_function_template = 'nnet::single_output_conv_2d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' + +single_out_conv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_conv1d_stream.h'] +single_out_conv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_conv2d_stream.h'] + +# Register the layer types to the layer map +register_layer('SingleOutputConv1D', SingleOutputConv1D) +register_layer('SingleOutputConv2D', SingleOutputConv2D) + +# Register the templates for config and function +templates.get_backend('Vivado').register_templates( + 'SingleOutputConv1D', + single_out_conv1d_function_template, + templates.get_backend('Vivado').get_config_template('Conv1D'), + single_out_conv1d_include_list +) + +templates.get_backend('Vivado').register_templates( + 'SingleOutputConv2D', + single_out_conv2d_function_template, + templates.get_backend('Vivado').get_config_template('Conv2D'), + single_out_conv2d_include_list +) + +class OptimizeSingleOutConv(OptimizerPass): + def match(self, node): + return node.__class__.__name__ in ['Conv1D', 'Conv2D'] and \ + node.get_attr('filt_height', 1) == node.get_attr('in_height', 1) and \ + node.get_attr('filt_width') == node.get_attr('in_width') and \ + node.get_attr('out_height', 1) == 1 and node.get_attr('out_width') == 1 + + def transform(self, model, node): + dim = node.__class__.__name__[-2:] # '1D' or '2D' + pw_node = model.make_node('SingleOutputConv' + dim, node.name, node.attributes.copy(), node.inputs.copy()) + model.replace_node(node, pw_node) + + return True diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h index 1a5b0d0dfd..c8230a8bff 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h @@ -56,5 +56,47 @@ void conv_1d_cl( } } +template +void single_output_conv_1d_cl( + hls::stream &data, + hls::stream &res, + typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt], + typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) +{ + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::filt_width == CONFIG_T::in_width); + assert(CONFIG_T::out_width == 1); + + typename data_T::value_type data_window[CONFIG_T::filt_width * CONFIG_T::n_chan]; + #pragma HLS ARRAY_PARTITION variable=data_window complete + typename res_T::value_type res_elem[CONFIG_T::n_filt]; + #pragma HLS ARRAY_PARTITION variable=res_elem complete + res_T res_pack; + #pragma HLS DATA_PACK variable=res_pack + + ReadInputImage: for (unsigned i = 0; i < CONFIG_T::in_width; i++) { + #pragma HLS PIPELINE + data_T in_elem = data.read(); + CopyDataChan: for (unsigned c = 0; c < CONFIG_T::n_chan; c++) { + #pragma HLS UNROLL + data_window[i * CONFIG_T::n_chan + c] = in_elem[c]; + } + } + + #pragma HLS INLINE region + if (CONFIG_T::strategy == nnet::latency) { + dense_latency(data_window, res_elem, weights, biases); + } else { + dense_resource(data_window, res_elem, weights, biases); + } + + CastLoop: for (unsigned jj = 0; jj < CONFIG_T::n_filt; jj++) { + #pragma HLS UNROLL + res_pack[jj] = res_elem[jj]; + } + + res.write(res_pack); +} + } #endif diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_stream.h index 1822025cd4..d5d718b5bc 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_stream.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_conv2d_stream.h @@ -61,5 +61,47 @@ void conv_2d_cl( } } +template +void single_output_conv_2d_cl( + hls::stream &data, + hls::stream &res, + typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt], + typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) +{ + assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::filt_height == CONFIG_T::in_height && CONFIG_T::filt_width == CONFIG_T::in_width); + assert(CONFIG_T::out_height == 1 && CONFIG_T::out_width == 1); + + typename data_T::value_type data_window[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan]; + #pragma HLS ARRAY_PARTITION variable=data_window complete + typename res_T::value_type res_elem[CONFIG_T::n_filt]; + #pragma HLS ARRAY_PARTITION variable=res_elem complete + res_T res_pack; + #pragma HLS DATA_PACK variable=res_pack + + ReadInputImage: for (unsigned i = 0; i < CONFIG_T::in_height * CONFIG_T::in_width; i++) { + #pragma HLS PIPELINE + data_T in_elem = data.read(); + CopyDataChan: for (unsigned c = 0; c < CONFIG_T::n_chan; c++) { + #pragma HLS UNROLL + data_window[i * CONFIG_T::n_chan + c] = in_elem[c]; + } + } + + #pragma HLS INLINE region + if (CONFIG_T::strategy == nnet::latency) { + dense_latency(data_window, res_elem, weights, biases); + } else { + dense_resource(data_window, res_elem, weights, biases); + } + + CastLoop: for (unsigned jj = 0; jj < CONFIG_T::n_filt; jj++) { + #pragma HLS UNROLL + res_pack[jj] = res_elem[jj]; + } + + res.write(res_pack); +} + } #endif