Allow kernel_size < stride in io_stream routines

thesps · Apr 18, 2021 · 8ad3bec · 8ad3bec
1 parent 9eade88
commit 8ad3bec
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 7 deletions.
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1d_stream.h
@@ -30,7 +30,6 @@ void conv_1d_cl(
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt])
 {
     assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
-    assert(CONFIG_T::stride_width <= CONFIG_T::filt_width);
 
     hls::stream<typename data_T::value_type> data_window[CONFIG_T::filt_width * CONFIG_T::n_chan];
     const int win_depth = CONFIG_T::out_width;

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv_stream.h
@@ -7,7 +7,7 @@
 namespace nnet {
 
 template<unsigned K, unsigned S, unsigned W>
-unsigned scale_index(const unsigned idx) {
+unsigned scale_index_K_gte_S(const unsigned idx) {
     #pragma HLS INLINE
 
     if (idx < K - S) {
@@ -28,6 +28,39 @@ unsigned scale_index(const unsigned idx) {
     return K - S + (idx - (K - S)) % S;
 }
 
+template<unsigned K, unsigned S, unsigned W>
+unsigned scale_index_K_lt_S(const unsigned idx) {
+    #pragma HLS INLINE
+
+    if (idx < S - K) {
+        return idx;
+    }
+
+    constexpr unsigned nW = ((W - K) / S) * S + K; // Nearest W without unused pixels on the right
+    constexpr unsigned sW = (DIV_ROUNDUP(S, K) - 1) * S + K; // Scaled W that behaves like original W
+    if (idx >= nW) {
+        return sW;
+    }
+
+    const unsigned r = nW - idx;
+    if (r <= S - K) {
+        return sW - r;
+    }
+
+    return S - K + (idx - (S - K)) % S;
+}
+
+template<unsigned K, unsigned S, unsigned W>
+unsigned scale_index(const unsigned idx) {
+    #pragma HLS INLINE
+
+    if (K >= S) {
+        return scale_index_K_gte_S<K, S, W>(idx);
+    } else {
+        return scale_index_K_lt_S<K, S, W>(idx);
+    }
+}
+
 template<class data_T, class res_T, typename CONFIG_T>
 void mult_buffer(
     hls::stream<typename data_T::value_type> data_window[CONFIG_T::kernel_size * CONFIG_T::n_chan],

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h
@@ -16,7 +16,6 @@ void depthwise_conv_1d_cl(
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_chan])
 {
     assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
-    assert(CONFIG_T::stride_width <= CONFIG_T::filt_width);
 
     hls::stream<typename data_T::value_type> data_window[CONFIG_T::filt_width * CONFIG_T::n_chan];
     const int win_depth = CONFIG_T::out_width;

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h
@@ -17,7 +17,6 @@ void depthwise_conv_2d_cl(
 {
     assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
     assert(CONFIG_T::filt_height == CONFIG_T::filt_width);
-    assert(CONFIG_T::stride_height <= CONFIG_T::filt_height && CONFIG_T::stride_width <= CONFIG_T::filt_width);
 
     hls::stream<typename data_T::value_type> data_window[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan];
     const int win_depth = CONFIG_T::filt_height * CONFIG_T::out_width;

diff --git a/hls4ml/templates/vivado_template.py b/hls4ml/templates/vivado_template.py
@@ -525,7 +525,11 @@ def compute_conv1d_instructions(self, in_W, in_C, kernel_size=3, stride=1, pad=0
         # Current limitations
         assert pad == 0
 
-        min_W = (math.ceil(kernel_size / stride) - 1) * stride + kernel_size
+        if kernel_size >= stride:
+            min_W = (math.ceil(kernel_size / stride) - 1) * stride + kernel_size
+        else:
+            min_W = (math.ceil(stride / kernel_size) - 1) * stride + kernel_size
+
         min_oW = int((min_W - kernel_size) // stride + 1)
 
         out_W = int((in_W - kernel_size) // stride + 1)
@@ -569,8 +573,16 @@ def compute_conv2d_instructions(self, in_H, in_W, in_C, kernel_size=3, stride=1,
         assert stride_height == stride_width
         assert pad == 0
 
-        min_H = (math.ceil(kernel_height / stride_height) - 1) * stride_height + kernel_height
-        min_W = (math.ceil(kernel_width / stride_width) - 1) * stride_width + kernel_width
+        if kernel_height >= stride_height:
+            min_H = (math.ceil(kernel_height / stride_height) - 1) * stride_height + kernel_height
+        else:
+            min_H = (math.ceil(stride_height / kernel_height) - 1) * stride_height + kernel_height
+
+        if kernel_width >= stride_width:
+            min_W = (math.ceil(kernel_width / stride_width) - 1) * stride_width + kernel_width
+        else:
+            min_W = (math.ceil(stride_width / kernel_width) - 1) * stride_width + kernel_width
+
         min_oH = int((min_H - kernel_height) // stride_height + 1)
         min_oW = int((min_W - kernel_width) // stride_width + 1)