@@ -7,6 +7,11 @@ module nf_self_attention_layer
77 implicit none
88
99 type, extends(multihead_attention_layer) :: self_attention_layer
10+ ! ! Self Attention Layer
11+ ! ! Source:
12+ ! ! Parikh, A. P., Taeckstroem, O., Das, D., & Uszkoreit, J. (2016)
13+ ! ! A decomposable attention model for natural language inference.
14+ ! ! https://arxiv.org/pdf/1606.01933
1015 real , allocatable :: gradient(:, :)
1116 contains
1217 procedure :: forward
@@ -50,6 +55,8 @@ module function self_attention_layer_cons(sequence_length, model_dimension, n_he
5055 end function self_attention_layer_cons
5156
5257 module subroutine backward (self , input , gradient )
58+ ! ! Self Attention back propagation
59+ ! ! Returns sum of Query, Key and Value gradients
5360 class(self_attention_layer), intent (in out ) :: self
5461 real , intent (in ) :: input(:, :)
5562 real , intent (in ) :: gradient(:, :)
@@ -62,6 +69,9 @@ module subroutine backward(self, input, gradient)
6269 end subroutine backward
6370
6471 module subroutine forward (self , input )
72+ ! ! Cross Attention forward propagation
73+ ! ! Passes input three times into MultiHead Attention
74+ ! ! Input Shape: (sequence_length, model_dimension)
6575 class(self_attention_layer), intent (in out ) :: self
6676 real , intent (in ) :: input(:, :)
6777
0 commit comments