Implementation of stride

jvdp1 · jvdp1 · commit 553a55e33914 · 2025-10-31T17:40:51.000+01:00
diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90
@@ -63,7 +63,7 @@ module subroutine init(self, input_shape)
           !! Input layer dimensions
       end subroutine init
   
-      module subroutine forward(self, input)
+      pure module subroutine forward(self, input)
         !! Apply a forward pass on the `conv1d` layer.
         class(conv1d_layer), intent(in out) :: self
           !! A `conv1d_layer` instance
diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
@@ -57,7 +57,7 @@ module subroutine init(self, input_shape)
 
   end subroutine init
 
-  module subroutine forward(self, input)
+  pure module subroutine forward(self, input)
     implicit none
     class(conv1d_layer), intent(in out) :: self
     real, intent(in) :: input(:,:)
@@ -125,13 +125,13 @@ pure module subroutine backward(self, input, gradient)
     do n = 1, self % filters
       do j = 1, self % width
         iws = self % stride * (j-1) + 1
-        iwe = max(iws + self % kernel_size - 1, input_width)
+        iwe = min(iws + self % kernel_size - 1, input_width)
 
         do k = 1, self % channels
           ! Weight gradient: accumulate contribution from the input window.
-          dw_local(n,k,1:iws-iwe+1) = dw_local(n,k,1:iws-iwe+1) + input(k,iws:iwe) * gdz(n,j)
+          dw_local(n,k,1:iwe-iws+1) = dw_local(n,k,1:iwe-iws+1) + input(k,iws:iwe) * gdz(n,j)
           ! Input gradient: propagate gradient back to the input window.
-          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iws-iwe+1) * gdz(n,j)
+          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iwe-iws+1) * gdz(n,j)
         end do
       end do
     end do
diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90
@@ -30,8 +30,12 @@ module subroutine init(self, input_shape)
     integer, intent(in) :: input_shape(:)
 
     self % channels = input_shape(1)
-    self % width = (input_shape(2) - self % kernel_size + 1) / self % stride(1)
-    self % height = (input_shape(3) - self % kernel_size + 1) / self % stride(2)
+
+    self % width = (input_shape(2) - self % kernel_size) / self % stride(1) + 1
+    if (mod(input_shape(2) - self % kernel_size , self % stride(1)) /= 0) self % width = self % width + 1
+
+    self % height = (input_shape(3) - self % kernel_size) / self % stride(2) + 1
+    if (mod(input_shape(3) - self % kernel_size , self % stride(2)) /= 0) self % height = self % height + 1
 
     ! Output of shape filters x width x height
     allocate(self % output(self % filters, self % width, self % height))
@@ -89,22 +93,24 @@ pure module subroutine forward(self, input)
     iend = input_width - istart + 1
     jend = input_height - jstart + 1
 
-    convolution: do concurrent(i = istart:iend, j = jstart:jend)
+!    convolution: do concurrent(i = istart:iend, j = jstart:jend)
+    convolution: do concurrent(i = 1:self % width, j = 1:self%height)
 
       ! Start and end indices of the input data on the filter window
       ! iws and jws are also coincidentally the indices of the output matrix
-      iws = i - half_window ! TODO kernel_width
-      iwe = i + half_window ! TODO kernel_width
-      jws = j - half_window ! TODO kernel_height
-      jwe = j + half_window ! TODO kernel_height
+      iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
+      iwe = min(iws + 2*half_window, input_width)          ! TODO kernel_width
+
+      jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
+      jwe = min(jws + 2*half_window, input_height)         ! TODO kernel_height
 
       ! Compute the inner tensor product, sum(w_ij * x_ij), for each filter.
       do concurrent(n = 1:self % filters)
-        self % z(n,iws,jws) = sum(self % kernel(n,:,:,:) * input(:,iws:iwe,jws:jwe))
+        self % z(n,i,j) = sum(self % kernel(n,:,1:iwe-iws+1,1:jwe-jws+1) * input(:,iws:iwe,jws:jwe))
       end do
 
       ! Add bias to the inner product.
-      self % z(:,iws,jws) = self % z(:,iws,jws) + self % biases
+      self % z(:,i,j) = self % z(:,i,j) + self % biases
 
     end do convolution
 
@@ -160,21 +166,28 @@ pure module subroutine backward(self, input, gradient)
     do concurrent( &
       n = 1:self % filters, &
       k = 1:self % channels, &
-      i = istart:iend, &
-      j = jstart:jend &
+      i = 1:self % width, &
+      j = 1:self % height &
+      !i = istart:iend, &
+      !j = jstart:jend &
     )
       ! Start and end indices of the input data on the filter window
-      iws = i - half_window ! TODO kernel_width
-      iwe = i + half_window ! TODO kernel_width
-      jws = j - half_window ! TODO kernel_height
-      jwe = j + half_window ! TODO kernel_height
+      !iws = i - half_window ! TODO kernel_width
+      !iwe = i + half_window ! TODO kernel_width
+      !jws = j - half_window ! TODO kernel_height
+      !jwe = j + half_window ! TODO kernel_height
+      iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
+      iwe = min(iws + 2*half_window, input_width)          ! TODO kernel_width
+
+      jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
+      jwe = min(jws + 2*half_window, input_height)         ! TODO kernel_height
 
       ! dL/dw = sum(dL/dy * sigma'(z) * x)
       dw(n,k,:,:) = dw(n,k,:,:) + input(k,iws:iwe,jws:jwe) * gdz(n,iws:iwe,jws:jwe)
 
       ! dL/dx = dL/dy * sigma'(z) .inner. w
-      self % gradient(k,i,j) = self % gradient(k,i,j) &
-        + sum(gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,:,:))
+      self % gradient(k,iws:iwe,jws:jwe) = self % gradient(k,iws:iwe,jws:jwe) &
+        + gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,1:iwe-iws+1,1:jwe-jws+1)
 
     end do
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
@@ -67,7 +67,7 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
     integer, intent(in), optional :: stride(:)
     type(layer) :: res
 
-    integer :: stride_tmp(2)
+    integer, allocatable :: stride_tmp(:)
     class(activation_function), allocatable :: activation_tmp
 
     ! Enforce kernel_width == kernel_height for now;
@@ -76,12 +76,6 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
     if (kernel_width /= kernel_height) &
       error stop 'kernel_width must equal kernel_height in a conv2d layer'
 
-    if (size(stride) /= 2 ) &
-      error stop 'size of stride must be equal to 2 in a conv2d layer'
-
-    if (stride(1) < 1 .or. stride(2) < 1) &
-      error stop 'stride must be >= 1 in a conv2d layer'
-
     res % name = 'conv2d'
 
     if (present(activation)) then
@@ -98,9 +92,15 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
       stride_tmp = [1, 1]
     endif
 
+    if (size(stride_tmp) /= 2 ) &
+      error stop 'size of stride must be equal to 2 in a conv2d layer'
+
+    if (stride_tmp(1) < 1 .or. stride_tmp(2) < 1) &
+      error stop 'stride must be >= 1 in a conv2d layer'
+
     allocate( &
       res % p, &
-      source=conv2d_layer(filters, kernel_width, activation_tmp, stride) &
+      source=conv2d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
     )
 
   end function conv2d
diff --git a/test/test_conv2d_layer.f90 b/test/test_conv2d_layer.f90
@@ -59,6 +59,30 @@ program test_conv2d_layer
     call this_layer % set(sample_input)
   end select
 
+  deallocate(sample_input)
+
+  call conv_layer % forward(input_layer)
+  call conv_layer % get_output(output)
+
+  if (.not. all(abs(output) < tolerance)) then
+    ok = .false.
+    write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed'
+  end if
+
+  ! Minimal conv2d layer: 1 channel, 17x17 pixel image, stride=3;
+  allocate(sample_input(1, 17, 17))
+  sample_input = 0
+
+  input_layer = input(1, 17, 17)
+  conv_layer = conv(filters, kernel_size, kernel_size, stride=[3, 4])
+  call conv_layer % init(input_layer)
+
+  select type(this_layer => input_layer % p); type is(input3d_layer)
+    call this_layer % set(sample_input)
+  end select
+
+  deallocate(sample_input)
+
   call conv_layer % forward(input_layer)
   call conv_layer % get_output(output)
 
@@ -67,6 +91,7 @@ program test_conv2d_layer
     write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed'
   end if
 
+  ! Summary
   if (ok) then
     print '(a)', 'test_conv2d_layer: All tests passed.'
   else