make linear2d_layer with batch as last dimension (performance)

OneAdder · OneAdder · commit aa4f8f2626cf · 2025-02-05T12:01:40.000+04:00
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
@@ -166,7 +166,7 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
-    module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+    module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
       integer, intent(in) :: batch_size, sequence_length, in_features, out_features
       type(layer) :: res
     end function linear2d
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
@@ -135,13 +135,13 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
-  module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+  module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
     integer, intent(in) :: batch_size, sequence_length, in_features, out_features
     type(layer) :: res
 
     res % name = 'linear2d'
-    res % layer_shape = [batch_size, sequence_length, out_features]
-    allocate(res % p, source=linear2d_layer(batch_size, sequence_length, in_features, out_features))
+    res % layer_shape = [sequence_length, out_features, batch_size]
+    allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features, batch_size))
   end function linear2d
 
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
@@ -9,7 +9,7 @@ module nf_linear2d_layer
   public :: linear2d_layer
 
   type, extends(base_layer) :: linear2d_layer
-    integer :: batch_size, sequence_length, in_features, out_features
+    integer :: sequence_length, in_features, out_features, batch_size
 
     real, allocatable :: weights(:, :)
     real, allocatable :: biases(:)
@@ -32,7 +32,7 @@ module nf_linear2d_layer
 
   interface linear2d_layer
     module function linear2d_layer_cons(&
-        batch_size, sequence_length, in_features, out_features&
+        sequence_length, in_features, out_features, batch_size&
     ) result(res)
       integer, intent(in) :: batch_size, sequence_length, in_features, out_features
       type(linear2d_layer) :: res
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
@@ -3,7 +3,7 @@
   implicit none
 contains
   module function linear2d_layer_cons(&
-      batch_size, sequence_length, in_features, out_features&
+      sequence_length, in_features, out_features, batch_size&
   ) result(res)
     integer, intent(in) :: batch_size, sequence_length, in_features, out_features
     type(linear2d_layer) :: res
@@ -18,8 +18,8 @@ module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
-    allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
-    allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
+    allocate(self % output(self % sequence_length, self % out_features, self % batch_size))
+    allocate(self % gradient(self % sequence_length, self % in_features, self % batch_size))
 
     allocate(self % weights(self % in_features, self % out_features))
     self % weights = 0.1
@@ -39,10 +39,10 @@ pure module subroutine forward(self, input)
     integer :: i, j
 
     do concurrent(i = 1: self % batch_size)
-      self % output(i, :, :) = matmul(input(i, :, :), self % weights)
+      self % output(:, :, i) = matmul(input(:, :, i), self % weights)
     end do
     do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
-      self % output(i, j, :) = self % output(i, j, :) + self % biases
+      self % output(j, :, i) = self % output(j, :, i) + self % biases
     end do
   end subroutine forward
 
@@ -55,9 +55,9 @@ pure module subroutine backward(self, input, gradient)
     integer :: i
 
     do concurrent(i = 1: self % batch_size)
-      self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
-      self % db = self % db + sum(gradient(i, :, :), 1)
-      self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
+      self % dw = self % dw + matmul(transpose(input(:, :, i)), gradient(:, :, i))
+      self % db = self % db + sum(gradient(:, :, i), 1)
+      self % gradient(:, :, i) = matmul(gradient(:, :, i), transpose(self % weights))
     end do
   end subroutine backward