Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/nf/nf_conv1d_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module nf_conv1d_layer
integer :: channels
integer :: kernel_size
integer :: filters
integer :: stride

real, allocatable :: biases(:) ! size(filters)
real, allocatable :: kernel(:,:,:) ! filters x channels x window
Expand All @@ -39,12 +40,13 @@ module nf_conv1d_layer
end type conv1d_layer

interface conv1d_layer
module function conv1d_layer_cons(filters, kernel_size, activation) &
module function conv1d_layer_cons(filters, kernel_size, activation, stride) &
result(res)
!! `conv1d_layer` constructor function
integer, intent(in) :: filters
integer, intent(in) :: kernel_size
class(activation_function), intent(in) :: activation
integer, intent(in) :: stride
type(conv1d_layer) :: res
end function conv1d_layer_cons
end interface conv1d_layer
Expand Down
28 changes: 16 additions & 12 deletions src/nf/nf_conv1d_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@

contains

module function conv1d_layer_cons(filters, kernel_size, activation) result(res)
module function conv1d_layer_cons(filters, kernel_size, activation, stride) result(res)
integer, intent(in) :: filters
integer, intent(in) :: kernel_size
class(activation_function), intent(in) :: activation
integer, intent(in) :: stride
type(conv1d_layer) :: res

res % kernel_size = kernel_size
res % filters = filters
res % activation_name = activation % get_name()
res % stride = stride
allocate( res % activation, source = activation )
end function conv1d_layer_cons

Expand All @@ -25,7 +27,9 @@ module subroutine init(self, input_shape)
integer, intent(in) :: input_shape(:)

self % channels = input_shape(1)
self % width = input_shape(2) - self % kernel_size + 1
self % width = (input_shape(2) - self % kernel_size) / self % stride +1

if (mod(input_shape(2) - self % kernel_size , self % stride) /= 0) self % width = self % width + 1

! Output of shape: filters x width
allocate(self % output(self % filters, self % width))
Expand Down Expand Up @@ -68,12 +72,12 @@ pure module subroutine forward(self, input)
do j = 1, self % width
! Compute the input window corresponding to output index j.
! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
iws = j
iwe = j + self % kernel_size - 1
iws = self % stride * (j-1) + 1
iwe = min(iws + self % kernel_size - 1, input_width)

! For each filter, compute the convolution (inner product over channels and kernel width).
do concurrent (n = 1:self % filters)
self % z(n, j) = sum(self % kernel(n,:,:) * input(:,iws:iwe))
self % z(n, j) = sum(self % kernel(n,:,1:iwe-iws+1) * input(:,iws:iwe))
end do

! Add the bias for each filter.
Expand All @@ -92,7 +96,7 @@ pure module subroutine backward(self, input, gradient)
real, intent(in) :: input(:,:)
real, intent(in) :: gradient(:,:)

integer :: input_channels, input_width, output_width
integer :: input_channels, input_width
integer :: j, n, k
integer :: iws, iwe

Expand All @@ -104,7 +108,6 @@ pure module subroutine backward(self, input, gradient)
! Determine dimensions.
input_channels = size(input, dim=1)
input_width = size(input, dim=2)
output_width = self % width ! Note: output_width = input_width - kernel_size + 1

!--- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
gdz = gradient * self % activation % eval_prime(self % z)
Expand All @@ -120,14 +123,15 @@ pure module subroutine backward(self, input, gradient)
! In the forward pass the window for output index j was:
! iws = j, iwe = j + kernel_size - 1.
do n = 1, self % filters
do j = 1, output_width
iws = j
iwe = j + self % kernel_size - 1
do j = 1, self % width
iws = self % stride * (j-1) + 1
iwe = min(iws + self % kernel_size - 1, input_width)

do k = 1, self % channels
! Weight gradient: accumulate contribution from the input window.
dw_local(n,k,:) = dw_local(n,k,:) + input(k,iws:iwe) * gdz(n,j)
dw_local(n,k,1:iwe-iws+1) = dw_local(n,k,1:iwe-iws+1) + input(k,iws:iwe) * gdz(n,j)
! Input gradient: propagate gradient back to the input window.
self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,:) * gdz(n,j)
self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iwe-iws+1) * gdz(n,j)
end do
end do
end do
Expand Down
4 changes: 3 additions & 1 deletion src/nf/nf_conv2d_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ module nf_conv2d_layer
integer :: channels
integer :: kernel_size
integer :: filters
integer :: stride(2)

real, allocatable :: biases(:) ! size(filters)
real, allocatable :: kernel(:,:,:,:) ! filters x channels x window x window
Expand All @@ -40,12 +41,13 @@ module nf_conv2d_layer
end type conv2d_layer

interface conv2d_layer
module function conv2d_layer_cons(filters, kernel_size, activation) &
module function conv2d_layer_cons(filters, kernel_size, activation, stride) &
result(res)
!! `conv2d_layer` constructor function
integer, intent(in) :: filters
integer, intent(in) :: kernel_size
class(activation_function), intent(in) :: activation
integer, intent(in) :: stride(:)
type(conv2d_layer) :: res
end function conv2d_layer_cons
end interface conv2d_layer
Expand Down
51 changes: 33 additions & 18 deletions src/nf/nf_conv2d_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@

contains

module function conv2d_layer_cons(filters, kernel_size, activation) result(res)
module function conv2d_layer_cons(filters, kernel_size, activation, stride) result(res)
implicit none
integer, intent(in) :: filters
integer, intent(in) :: kernel_size
class(activation_function), intent(in) :: activation
integer, intent(in) :: stride(:)
type(conv2d_layer) :: res

res % kernel_size = kernel_size
res % filters = filters
res % activation_name = activation % get_name()
res % stride = stride
allocate( res % activation, source = activation )

end function conv2d_layer_cons
Expand All @@ -28,8 +30,12 @@ module subroutine init(self, input_shape)
integer, intent(in) :: input_shape(:)

self % channels = input_shape(1)
self % width = input_shape(2) - self % kernel_size + 1
self % height = input_shape(3) - self % kernel_size + 1

self % width = (input_shape(2) - self % kernel_size) / self % stride(1) + 1
if (mod(input_shape(2) - self % kernel_size , self % stride(1)) /= 0) self % width = self % width + 1

self % height = (input_shape(3) - self % kernel_size) / self % stride(2) + 1
if (mod(input_shape(3) - self % kernel_size , self % stride(2)) /= 0) self % height = self % height + 1

! Output of shape filters x width x height
allocate(self % output(self % filters, self % width, self % height))
Expand Down Expand Up @@ -87,22 +93,24 @@ pure module subroutine forward(self, input)
iend = input_width - istart + 1
jend = input_height - jstart + 1

convolution: do concurrent(i = istart:iend, j = jstart:jend)
! convolution: do concurrent(i = istart:iend, j = jstart:jend)
convolution: do concurrent(i = 1:self % width, j = 1:self%height)

! Start and end indices of the input data on the filter window
! iws and jws are also coincidentally the indices of the output matrix
iws = i - half_window ! TODO kernel_width
iwe = i + half_window ! TODO kernel_width
jws = j - half_window ! TODO kernel_height
jwe = j + half_window ! TODO kernel_height
iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
iwe = min(iws + 2*half_window, input_width) ! TODO kernel_width

jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
jwe = min(jws + 2*half_window, input_height) ! TODO kernel_height

! Compute the inner tensor product, sum(w_ij * x_ij), for each filter.
do concurrent(n = 1:self % filters)
self % z(n,iws,jws) = sum(self % kernel(n,:,:,:) * input(:,iws:iwe,jws:jwe))
self % z(n,i,j) = sum(self % kernel(n,:,1:iwe-iws+1,1:jwe-jws+1) * input(:,iws:iwe,jws:jwe))
end do

! Add bias to the inner product.
self % z(:,iws,jws) = self % z(:,iws,jws) + self % biases
self % z(:,i,j) = self % z(:,i,j) + self % biases

end do convolution

Expand Down Expand Up @@ -158,21 +166,28 @@ pure module subroutine backward(self, input, gradient)
do concurrent( &
n = 1:self % filters, &
k = 1:self % channels, &
i = istart:iend, &
j = jstart:jend &
i = 1:self % width, &
j = 1:self % height &
!i = istart:iend, &
!j = jstart:jend &
)
! Start and end indices of the input data on the filter window
iws = i - half_window ! TODO kernel_width
iwe = i + half_window ! TODO kernel_width
jws = j - half_window ! TODO kernel_height
jwe = j + half_window ! TODO kernel_height
!iws = i - half_window ! TODO kernel_width
!iwe = i + half_window ! TODO kernel_width
!jws = j - half_window ! TODO kernel_height
!jwe = j + half_window ! TODO kernel_height
iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
iwe = min(iws + 2*half_window, input_width) ! TODO kernel_width

jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
jwe = min(jws + 2*half_window, input_height) ! TODO kernel_height

! dL/dw = sum(dL/dy * sigma'(z) * x)
dw(n,k,:,:) = dw(n,k,:,:) + input(k,iws:iwe,jws:jwe) * gdz(n,iws:iwe,jws:jwe)

! dL/dx = dL/dy * sigma'(z) .inner. w
self % gradient(k,i,j) = self % gradient(k,i,j) &
+ sum(gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,:,:))
self % gradient(k,iws:iwe,jws:jwe) = self % gradient(k,iws:iwe,jws:jwe) &
+ gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,1:iwe-iws+1,1:jwe-jws+1)
Comment on lines +189 to +190
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Riccardo231 Could you check this, please? I think it has a different behaviour now. However, I am not sure what was the goal before the change, because all entries of self % gradient were not updated (that is only the entries between istart:iend and jstart:jend were updated).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I implemented the conv2d variant.. I'll look at this carefully over the weekend. It's possible that the original code was bad.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll have a look tomorrow.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, just to inform you these days I'm really busy with school. Can probably have a look after the 5th. Sorry for the delay

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, just to inform you these days I'm really busy with school. Can probably have a look after the 5th. Sorry for the delay

No worries. Whenever you have time. Thank you.


end do

Expand Down
8 changes: 6 additions & 2 deletions src/nf/nf_layer_constructors.f90
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ end function input3d

interface conv

module function conv1d(filters, kernel_width, activation) result(res)
module function conv1d(filters, kernel_width, activation, stride) result(res)
!! 1-d convolutional layer constructor.
!!
!! This layer is for building 1-d convolutional network.
Expand All @@ -117,11 +117,13 @@ module function conv1d(filters, kernel_width, activation) result(res)
!! Width of the convolution window, commonly 3 or 5
class(activation_function), intent(in), optional :: activation
!! Activation function (default sigmoid)
integer, intent(in), optional :: stride
!! Stride length of the convolution
type(layer) :: res
!! Resulting layer instance
end function conv1d

module function conv2d(filters, kernel_width, kernel_height, activation) result(res)
module function conv2d(filters, kernel_width, kernel_height, activation, stride) result(res)
!! 2-d convolutional layer constructor.
!!
!! This layer is for building 2-d convolutional network.
Expand All @@ -147,6 +149,8 @@ module function conv2d(filters, kernel_width, kernel_height, activation) result(
!! Height of the convolution window, commonly 3 or 5
class(activation_function), intent(in), optional :: activation
!! Activation function (default sigmoid)
integer, intent(in), optional :: stride(:)
!! Stride length of the convolution
type(layer) :: res
!! Resulting layer instance
end function conv2d
Expand Down
33 changes: 29 additions & 4 deletions src/nf/nf_layer_constructors_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@

contains

module function conv1d(filters, kernel_width, activation) result(res)
module function conv1d(filters, kernel_width, activation, stride) result(res)
integer, intent(in) :: filters
integer, intent(in) :: kernel_width
class(activation_function), intent(in), optional :: activation
integer, intent(in), optional :: stride
type(layer) :: res

integer :: stride_tmp
class(activation_function), allocatable :: activation_tmp

res % name = 'conv1d'
Expand All @@ -41,20 +43,31 @@ module function conv1d(filters, kernel_width, activation) result(res)

res % activation = activation_tmp % get_name()

if (present(stride)) then
stride_tmp = stride
else
stride_tmp = 1
endif

if (stride_tmp < 1) &
error stop 'stride must be >= 1 in a conv1d layer'

allocate( &
res % p, &
source=conv1d_layer(filters, kernel_width, activation_tmp) &
source=conv1d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
)

end function conv1d

module function conv2d(filters, kernel_width, kernel_height, activation) result(res)
module function conv2d(filters, kernel_width, kernel_height, activation, stride) result(res)
integer, intent(in) :: filters
integer, intent(in) :: kernel_width
integer, intent(in) :: kernel_height
class(activation_function), intent(in), optional :: activation
integer, intent(in), optional :: stride(:)
type(layer) :: res

integer, allocatable :: stride_tmp(:)
class(activation_function), allocatable :: activation_tmp

! Enforce kernel_width == kernel_height for now;
Expand All @@ -73,9 +86,21 @@ module function conv2d(filters, kernel_width, kernel_height, activation) result(

res % activation = activation_tmp % get_name()

if (present(stride)) then
stride_tmp = stride
else
stride_tmp = [1, 1]
endif

if (size(stride_tmp) /= 2 ) &
error stop 'size of stride must be equal to 2 in a conv2d layer'

if (stride_tmp(1) < 1 .or. stride_tmp(2) < 1) &
error stop 'stride must be >= 1 in a conv2d layer'

allocate( &
res % p, &
source=conv2d_layer(filters, kernel_width, activation_tmp) &
source=conv2d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
)

end function conv2d
Expand Down
25 changes: 24 additions & 1 deletion test/test_conv1d_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ program test_conv1d_layer
select type(this_layer => input_layer % p); type is(input2d_layer)
call this_layer % set(sample_input)
end select
deallocate(sample_input)

call conv1d_layer % forward(input_layer)
call conv1d_layer % get_output(output)
Expand All @@ -67,11 +68,33 @@ program test_conv1d_layer
write(stderr, '(a)') 'conv1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
end if

! Minimal conv1d layer: 1 channel, 3x3 pixel image, stride = 3;
allocate(sample_input(1, 17))
sample_input = 0

input_layer = input(1, 17)
conv1d_layer = conv(filters, kernel_size, stride = 3)
call conv1d_layer % init(input_layer)

select type(this_layer => input_layer % p); type is(input2d_layer)
call this_layer % set(sample_input)
end select
deallocate(sample_input)

call conv1d_layer % forward(input_layer)
call conv1d_layer % get_output(output)

if (.not. all(abs(output) < tolerance)) then
ok = .false.
write(stderr, '(a)') 'conv1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
end if

!Final
if (ok) then
print '(a)', 'test_conv1d_layer: All tests passed.'
else
write(stderr, '(a)') 'test_conv1d_layer: One or more tests failed.'
stop 1
end if

end program test_conv1d_layer
Loading