Skip to content

Commit a99d800

Browse files
committed
Accumulate the gradient in dropout % backward and flush in network % update
1 parent aa19f69 commit a99d800

File tree

2 files changed

+6
-11
lines changed

2 files changed

+6
-11
lines changed

src/nf/nf_dropout_layer_submodule.f90

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ module subroutine forward(self, input)
4747
call shuffle(self % mask)
4848

4949
! Scale factor to preserve the input sum
50-
self % scale = sum(input) / sum(input * self % mask)
50+
self % scale = sum(input) / sum(input * self % mask) ! input conservative approach
51+
!self % scale = 1 / (1 - self % dropout_rate) ! reference approach
5152

5253
! Apply dropout mask
5354
self % output = input * self % mask * self % scale
@@ -68,10 +69,10 @@ pure module subroutine backward(self, input, gradient)
6869

6970
if (self % training) then
7071
! Backpropagate gradient through dropout mask
71-
self % gradient = gradient * self % mask * self % scale
72+
self % gradient = self % gradient + gradient * self % mask * self % scale
7273
else
7374
! In inference mode, pass through the gradient unchanged
74-
self % gradient = gradient
75+
self % gradient = self % gradient + gradient
7576
end if
7677
end subroutine backward
7778

src/nf/nf_network_submodule.f90

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -543,14 +543,6 @@ module subroutine train(self, input_data, output_data, batch_size, &
543543
self % loss = quadratic()
544544
end if
545545

546-
! Set all dropout layers' training mode to true.
547-
do n = 2, size(self % layers)
548-
select type(this_layer => self % layers(n) % p)
549-
type is(dropout_layer)
550-
this_layer % training = .true.
551-
end select
552-
end do
553-
554546
dataset_size = size(output_data, dim=2)
555547

556548
epoch_loop: do n = 1, epochs
@@ -640,6 +632,8 @@ module subroutine update(self, optimizer, batch_size)
640632
type is(conv2d_layer)
641633
this_layer % dw = 0
642634
this_layer % db = 0
635+
type is(dropout_layer)
636+
this_layer % gradient = 0
643637
end select
644638
end do
645639

0 commit comments

Comments
 (0)