From 127735896ba1a5a0d297c2cf009a543f01d5779c Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Sat, 25 Oct 2025 20:29:44 +0200
Subject: [PATCH 1/6] Addition of stride in API of conv

---
 src/nf/nf_conv1d_layer.f90                 |  4 ++-
 src/nf/nf_conv1d_layer_submodule.f90       |  4 ++-
 src/nf/nf_conv2d_layer.f90                 |  4 ++-
 src/nf/nf_conv2d_layer_submodule.f90       |  4 ++-
 src/nf/nf_layer_constructors.f90           |  8 ++++--
 src/nf/nf_layer_constructors_submodule.f90 | 33 +++++++++++++++++++---
 6 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90
index 0877092c..a6b29fa0 100644
--- a/src/nf/nf_conv1d_layer.f90
+++ b/src/nf/nf_conv1d_layer.f90
@@ -15,6 +15,7 @@ module nf_conv1d_layer
       integer :: channels
       integer :: kernel_size
       integer :: filters
+      integer :: stride
   
       real, allocatable :: biases(:) ! size(filters)
       real, allocatable :: kernel(:,:,:) ! filters x channels x window 
@@ -39,12 +40,13 @@ module nf_conv1d_layer
     end type conv1d_layer
   
     interface conv1d_layer
-      module function conv1d_layer_cons(filters, kernel_size, activation) &
+      module function conv1d_layer_cons(filters, kernel_size, activation, stride) &
         result(res)
         !! `conv1d_layer` constructor function
         integer, intent(in) :: filters
         integer, intent(in) :: kernel_size
         class(activation_function), intent(in) :: activation
+        integer, intent(in) :: stride
         type(conv1d_layer) :: res
       end function conv1d_layer_cons
     end interface conv1d_layer
diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
index 391a8bcc..5bcdfe19 100644
--- a/src/nf/nf_conv1d_layer_submodule.f90
+++ b/src/nf/nf_conv1d_layer_submodule.f90
@@ -7,15 +7,17 @@
 
 contains
 
-  module function conv1d_layer_cons(filters, kernel_size, activation) result(res)
+  module function conv1d_layer_cons(filters, kernel_size, activation, stride) result(res)
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_size
     class(activation_function), intent(in) :: activation
+    integer, intent(in) :: stride
     type(conv1d_layer) :: res
 
     res % kernel_size = kernel_size
     res % filters = filters
     res % activation_name = activation % get_name()
+    res % stride = stride
     allocate( res % activation, source = activation )
   end function conv1d_layer_cons
 
diff --git a/src/nf/nf_conv2d_layer.f90 b/src/nf/nf_conv2d_layer.f90
index 0b9c7356..676fadbb 100644
--- a/src/nf/nf_conv2d_layer.f90
+++ b/src/nf/nf_conv2d_layer.f90
@@ -16,6 +16,7 @@ module nf_conv2d_layer
     integer :: channels
     integer :: kernel_size
     integer :: filters
+    integer :: stride(2)
 
     real, allocatable :: biases(:) ! size(filters)
     real, allocatable :: kernel(:,:,:,:) ! filters x channels x window x window
@@ -40,12 +41,13 @@ module nf_conv2d_layer
   end type conv2d_layer
 
   interface conv2d_layer
-    module function conv2d_layer_cons(filters, kernel_size, activation) &
+    module function conv2d_layer_cons(filters, kernel_size, activation, stride) &
       result(res)
       !! `conv2d_layer` constructor function
       integer, intent(in) :: filters
       integer, intent(in) :: kernel_size
       class(activation_function), intent(in) :: activation
+      integer, intent(in) :: stride(:)
       type(conv2d_layer) :: res
     end function conv2d_layer_cons
   end interface conv2d_layer
diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90
index f4ee450c..056c33f0 100644
--- a/src/nf/nf_conv2d_layer_submodule.f90
+++ b/src/nf/nf_conv2d_layer_submodule.f90
@@ -7,16 +7,18 @@
 
 contains
 
-  module function conv2d_layer_cons(filters, kernel_size, activation) result(res)
+  module function conv2d_layer_cons(filters, kernel_size, activation, stride) result(res)
     implicit none
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_size
     class(activation_function), intent(in) :: activation
+    integer, intent(in) :: stride(:)
     type(conv2d_layer) :: res
 
     res % kernel_size = kernel_size
     res % filters = filters
     res % activation_name = activation % get_name()
+    res % stride = stride
     allocate( res % activation, source = activation )
 
   end function conv2d_layer_cons
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index 12492311..7b48f919 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -94,7 +94,7 @@ end function input3d
 
   interface conv
 
-    module function conv1d(filters, kernel_width, activation) result(res)
+    module function conv1d(filters, kernel_width, activation, stride) result(res)
       !! 1-d convolutional layer constructor.
       !!
       !! This layer is for building 1-d convolutional network.
@@ -117,11 +117,13 @@ module function conv1d(filters, kernel_width, activation) result(res)
         !! Width of the convolution window, commonly 3 or 5
       class(activation_function), intent(in), optional :: activation
         !! Activation function (default sigmoid)
+      integer, intent(in), optional :: stride
+        !! Stride length of the convolution
       type(layer) :: res
         !! Resulting layer instance
     end function conv1d
 
-    module function conv2d(filters, kernel_width, kernel_height, activation) result(res)
+    module function conv2d(filters, kernel_width, kernel_height, activation, stride) result(res)
       !! 2-d convolutional layer constructor.
       !!
       !! This layer is for building 2-d convolutional network.
@@ -147,6 +149,8 @@ module function conv2d(filters, kernel_width, kernel_height, activation) result(
         !! Height of the convolution window, commonly 3 or 5
       class(activation_function), intent(in), optional :: activation
         !! Activation function (default sigmoid)
+      integer, intent(in), optional :: stride(:)
+        !! Stride length of the convolution
       type(layer) :: res
         !! Resulting layer instance
     end function conv2d
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 594575fe..04b2cfc3 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -23,14 +23,19 @@
 
 contains
 
-  module function conv1d(filters, kernel_width, activation) result(res)
+  module function conv1d(filters, kernel_width, activation, stride) result(res)
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_width
     class(activation_function), intent(in), optional :: activation
+    integer, intent(in), optional :: stride
     type(layer) :: res
 
+    integer :: stride_tmp
     class(activation_function), allocatable :: activation_tmp
 
+    if (stride < 1) &
+      error stop 'stride must be >= 1 in a conv1d layer'
+
     res % name = 'conv1d'
 
     if (present(activation)) then
@@ -41,20 +46,28 @@ module function conv1d(filters, kernel_width, activation) result(res)
 
     res % activation = activation_tmp % get_name()
 
+    if (present(stride)) then
+      stride_tmp = stride
+    else
+      stride_tmp = 1
+    endif
+
     allocate( &
       res % p, &
-      source=conv1d_layer(filters, kernel_width, activation_tmp) &
+      source=conv1d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
     )
 
   end function conv1d
 
-  module function conv2d(filters, kernel_width, kernel_height, activation) result(res)
+  module function conv2d(filters, kernel_width, kernel_height, activation, stride) result(res)
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_width
     integer, intent(in) :: kernel_height
     class(activation_function), intent(in), optional :: activation
+    integer, intent(in), optional :: stride(:)
     type(layer) :: res
 
+    integer :: stride_tmp(2)
     class(activation_function), allocatable :: activation_tmp
 
     ! Enforce kernel_width == kernel_height for now;
@@ -63,6 +76,12 @@ module function conv2d(filters, kernel_width, kernel_height, activation) result(
     if (kernel_width /= kernel_height) &
       error stop 'kernel_width must equal kernel_height in a conv2d layer'
 
+    if (size(stride) /= 2 ) &
+      error stop 'size of stride must be equal to 2 in a conv2d layer'
+
+    if (stride(1) < 1 .or. stride(2) < 1) &
+      error stop 'stride must be >= 1 in a conv2d layer'
+
     res % name = 'conv2d'
 
     if (present(activation)) then
@@ -73,9 +92,15 @@ module function conv2d(filters, kernel_width, kernel_height, activation) result(
 
     res % activation = activation_tmp % get_name()
 
+    if (present(stride)) then
+      stride_tmp = stride
+    else
+      stride_tmp = [1, 1]
+    endif
+
     allocate( &
       res % p, &
-      source=conv2d_layer(filters, kernel_width, activation_tmp) &
+      source=conv2d_layer(filters, kernel_width, activation_tmp, stride) &
     )
 
   end function conv2d

From e73af4a1667d4950deb833f6658988bcadec3621 Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Sat, 25 Oct 2025 20:31:09 +0200
Subject: [PATCH 2/6] implementation of stride in conv1d

---
 src/nf/nf_conv1d_layer_submodule.f90 | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
index 5bcdfe19..f12bf2b1 100644
--- a/src/nf/nf_conv1d_layer_submodule.f90
+++ b/src/nf/nf_conv1d_layer_submodule.f90
@@ -27,7 +27,7 @@ module subroutine init(self, input_shape)
     integer, intent(in) :: input_shape(:)
 
     self % channels = input_shape(1)
-    self % width = input_shape(2) - self % kernel_size + 1
+    self % width = (input_shape(2) - self % kernel_size + 1) / self % stride
 
     ! Output of shape: filters x width
     allocate(self % output(self % filters, self % width))
@@ -70,12 +70,12 @@ pure module subroutine forward(self, input)
     do j = 1, self % width
       ! Compute the input window corresponding to output index j.
       ! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
-      iws = j
-      iwe = j + self % kernel_size - 1
+      iws = self % stride * (j-1) + 1
+      iwe = max(iws + self % kernel_size - 1, input_width)
 
       ! For each filter, compute the convolution (inner product over channels and kernel width).
       do concurrent (n = 1:self % filters)
-        self % z(n, j) = sum(self % kernel(n,:,:) * input(:,iws:iwe))
+        self % z(n, j) = sum(self % kernel(n,:,1:iwe-iws+1) * input(:,iws:iwe))
       end do
 
       ! Add the bias for each filter.
@@ -94,7 +94,7 @@ pure module subroutine backward(self, input, gradient)
     real, intent(in) :: input(:,:)
     real, intent(in) :: gradient(:,:)
 
-    integer :: input_channels, input_width, output_width
+    integer :: input_channels, input_width
     integer :: j, n, k
     integer :: iws, iwe
 
@@ -106,7 +106,6 @@ pure module subroutine backward(self, input, gradient)
     ! Determine dimensions.
     input_channels = size(input, dim=1)
     input_width = size(input, dim=2)
-    output_width = self % width    ! Note: output_width = input_width - kernel_size + 1
 
     !--- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
     gdz = gradient * self % activation % eval_prime(self % z)
@@ -122,14 +121,15 @@ pure module subroutine backward(self, input, gradient)
     ! In the forward pass the window for output index j was:
     !   iws = j,  iwe = j + kernel_size - 1.
     do n = 1, self % filters
-      do j = 1, output_width
-        iws = j
-        iwe = j + self % kernel_size - 1
+      do j = 1, self % width
+        iws = self % stride * (j-1) + 1
+        iwe = max(iws + self % kernel_size - 1, input_width)
+
         do k = 1, self % channels
           ! Weight gradient: accumulate contribution from the input window.
-          dw_local(n,k,:) = dw_local(n,k,:) + input(k,iws:iwe) * gdz(n,j)
+          dw_local(n,k,1:iws-iwe+1) = dw_local(n,k,1:iws-iwe+1) + input(k,iws:iwe) * gdz(n,j)
           ! Input gradient: propagate gradient back to the input window.
-          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,:) * gdz(n,j)
+          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iws-iwe+1) * gdz(n,j)
         end do
       end do
     end do

From eb7e112649734cdec2c40073cd3ac9a67d141611 Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Sat, 25 Oct 2025 20:35:20 +0200
Subject: [PATCH 3/6] start implementation of stride in conv2d

---
 src/nf/nf_conv2d_layer_submodule.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90
index 056c33f0..3afc0397 100644
--- a/src/nf/nf_conv2d_layer_submodule.f90
+++ b/src/nf/nf_conv2d_layer_submodule.f90
@@ -30,8 +30,8 @@ module subroutine init(self, input_shape)
     integer, intent(in) :: input_shape(:)
 
     self % channels = input_shape(1)
-    self % width = input_shape(2) - self % kernel_size + 1
-    self % height = input_shape(3) - self % kernel_size + 1
+    self % width = (input_shape(2) - self % kernel_size + 1) / self % stride(1)
+    self % height = (input_shape(3) - self % kernel_size + 1) / self % stride(2)
 
     ! Output of shape filters x width x height
     allocate(self % output(self % filters, self % width, self % height))

From 824bb130eb0cf8a1c2e2af647f26a1a5b7f5f75c Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Fri, 31 Oct 2025 15:39:04 +0100
Subject: [PATCH 4/6] Fix conv1d with stride

---
 src/nf/nf_conv1d_layer.f90                 |  2 +-
 src/nf/nf_conv1d_layer_submodule.f90       |  8 ++++---
 src/nf/nf_layer_constructors_submodule.f90 |  6 ++---
 test/test_conv1d_layer.f90                 | 27 ++++++++++++++++++++--
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90
index a6b29fa0..cbc5847c 100644
--- a/src/nf/nf_conv1d_layer.f90
+++ b/src/nf/nf_conv1d_layer.f90
@@ -63,7 +63,7 @@ module subroutine init(self, input_shape)
           !! Input layer dimensions
       end subroutine init
   
-      pure module subroutine forward(self, input)
+      module subroutine forward(self, input)
         !! Apply a forward pass on the `conv1d` layer.
         class(conv1d_layer), intent(in out) :: self
           !! A `conv1d_layer` instance
diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
index f12bf2b1..04ef3f82 100644
--- a/src/nf/nf_conv1d_layer_submodule.f90
+++ b/src/nf/nf_conv1d_layer_submodule.f90
@@ -27,7 +27,9 @@ module subroutine init(self, input_shape)
     integer, intent(in) :: input_shape(:)
 
     self % channels = input_shape(1)
-    self % width = (input_shape(2) - self % kernel_size + 1) / self % stride
+    self % width = (input_shape(2) - self % kernel_size) / self % stride +1
+
+    if (mod(input_shape(2) - self % kernel_size , self % stride) /= 0) self % width = self % width + 1
 
     ! Output of shape: filters x width
     allocate(self % output(self % filters, self % width))
@@ -55,7 +57,7 @@ module subroutine init(self, input_shape)
 
   end subroutine init
 
-  pure module subroutine forward(self, input)
+  module subroutine forward(self, input)
     implicit none
     class(conv1d_layer), intent(in out) :: self
     real, intent(in) :: input(:,:)
@@ -71,7 +73,7 @@ pure module subroutine forward(self, input)
       ! Compute the input window corresponding to output index j.
       ! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
       iws = self % stride * (j-1) + 1
-      iwe = max(iws + self % kernel_size - 1, input_width)
+      iwe = min(iws + self % kernel_size - 1, input_width)
 
       ! For each filter, compute the convolution (inner product over channels and kernel width).
       do concurrent (n = 1:self % filters)
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 04b2cfc3..b7b2815e 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -33,9 +33,6 @@ module function conv1d(filters, kernel_width, activation, stride) result(res)
     integer :: stride_tmp
     class(activation_function), allocatable :: activation_tmp
 
-    if (stride < 1) &
-      error stop 'stride must be >= 1 in a conv1d layer'
-
     res % name = 'conv1d'
 
     if (present(activation)) then
@@ -52,6 +49,9 @@ module function conv1d(filters, kernel_width, activation, stride) result(res)
       stride_tmp = 1
     endif
 
+    if (stride_tmp < 1) &
+      error stop 'stride must be >= 1 in a conv1d layer'
+
     allocate( &
       res % p, &
       source=conv1d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
diff --git a/test/test_conv1d_layer.f90 b/test/test_conv1d_layer.f90
index b80b520b..f03dc552 100644
--- a/test/test_conv1d_layer.f90
+++ b/test/test_conv1d_layer.f90
@@ -58,6 +58,7 @@ program test_conv1d_layer
     select type(this_layer => input_layer % p); type is(input2d_layer)
       call this_layer % set(sample_input)
     end select
+    deallocate(sample_input)
   
     call conv1d_layer % forward(input_layer)
     call conv1d_layer % get_output(output)
@@ -67,11 +68,33 @@ program test_conv1d_layer
       write(stderr, '(a)') 'conv1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
     end if
   
+    ! Minimal conv1d layer: 1 channel, 3x3 pixel image, stride = 3;
+    allocate(sample_input(1, 17))
+    sample_input = 0
+  
+    input_layer = input(1, 17)
+    conv1d_layer = conv(filters, kernel_size, stride = 3)
+    call conv1d_layer % init(input_layer)
+  
+    select type(this_layer => input_layer % p); type is(input2d_layer)
+      call this_layer % set(sample_input)
+    end select
+    deallocate(sample_input)
+  
+    call conv1d_layer % forward(input_layer)
+    call conv1d_layer % get_output(output)
+  
+    if (.not. all(abs(output) < tolerance)) then
+      ok = .false.
+      write(stderr, '(a)') 'conv1d layer with zero input and sigmoid function must forward to all 0.5.. failed'
+    end if
+  
+    !Final
     if (ok) then
       print '(a)', 'test_conv1d_layer: All tests passed.'
     else
       write(stderr, '(a)') 'test_conv1d_layer: One or more tests failed.'
-      stop 1
+      stop 2
     end if
-  
+
 end program test_conv1d_layer

From 553a55e3391488d1e7b183e72b857fc2612f62d8 Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Fri, 31 Oct 2025 17:40:51 +0100
Subject: [PATCH 5/6] Implementation of stride

---
 src/nf/nf_conv1d_layer.f90                 |  2 +-
 src/nf/nf_conv1d_layer_submodule.f90       |  8 ++--
 src/nf/nf_conv2d_layer_submodule.f90       | 47 ++++++++++++++--------
 src/nf/nf_layer_constructors_submodule.f90 | 16 ++++----
 test/test_conv2d_layer.f90                 | 25 ++++++++++++
 5 files changed, 68 insertions(+), 30 deletions(-)

diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90
index cbc5847c..a6b29fa0 100644
--- a/src/nf/nf_conv1d_layer.f90
+++ b/src/nf/nf_conv1d_layer.f90
@@ -63,7 +63,7 @@ module subroutine init(self, input_shape)
           !! Input layer dimensions
       end subroutine init
   
-      module subroutine forward(self, input)
+      pure module subroutine forward(self, input)
         !! Apply a forward pass on the `conv1d` layer.
         class(conv1d_layer), intent(in out) :: self
           !! A `conv1d_layer` instance
diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90
index 04ef3f82..b93a6ed8 100644
--- a/src/nf/nf_conv1d_layer_submodule.f90
+++ b/src/nf/nf_conv1d_layer_submodule.f90
@@ -57,7 +57,7 @@ module subroutine init(self, input_shape)
 
   end subroutine init
 
-  module subroutine forward(self, input)
+  pure module subroutine forward(self, input)
     implicit none
     class(conv1d_layer), intent(in out) :: self
     real, intent(in) :: input(:,:)
@@ -125,13 +125,13 @@ pure module subroutine backward(self, input, gradient)
     do n = 1, self % filters
       do j = 1, self % width
         iws = self % stride * (j-1) + 1
-        iwe = max(iws + self % kernel_size - 1, input_width)
+        iwe = min(iws + self % kernel_size - 1, input_width)
 
         do k = 1, self % channels
           ! Weight gradient: accumulate contribution from the input window.
-          dw_local(n,k,1:iws-iwe+1) = dw_local(n,k,1:iws-iwe+1) + input(k,iws:iwe) * gdz(n,j)
+          dw_local(n,k,1:iwe-iws+1) = dw_local(n,k,1:iwe-iws+1) + input(k,iws:iwe) * gdz(n,j)
           ! Input gradient: propagate gradient back to the input window.
-          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iws-iwe+1) * gdz(n,j)
+          self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k,1:iwe-iws+1) * gdz(n,j)
         end do
       end do
     end do
diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90
index 3afc0397..308c90a2 100644
--- a/src/nf/nf_conv2d_layer_submodule.f90
+++ b/src/nf/nf_conv2d_layer_submodule.f90
@@ -30,8 +30,12 @@ module subroutine init(self, input_shape)
     integer, intent(in) :: input_shape(:)
 
     self % channels = input_shape(1)
-    self % width = (input_shape(2) - self % kernel_size + 1) / self % stride(1)
-    self % height = (input_shape(3) - self % kernel_size + 1) / self % stride(2)
+
+    self % width = (input_shape(2) - self % kernel_size) / self % stride(1) + 1
+    if (mod(input_shape(2) - self % kernel_size , self % stride(1)) /= 0) self % width = self % width + 1
+
+    self % height = (input_shape(3) - self % kernel_size) / self % stride(2) + 1
+    if (mod(input_shape(3) - self % kernel_size , self % stride(2)) /= 0) self % height = self % height + 1
 
     ! Output of shape filters x width x height
     allocate(self % output(self % filters, self % width, self % height))
@@ -89,22 +93,24 @@ pure module subroutine forward(self, input)
     iend = input_width - istart + 1
     jend = input_height - jstart + 1
 
-    convolution: do concurrent(i = istart:iend, j = jstart:jend)
+!    convolution: do concurrent(i = istart:iend, j = jstart:jend)
+    convolution: do concurrent(i = 1:self % width, j = 1:self%height)
 
       ! Start and end indices of the input data on the filter window
       ! iws and jws are also coincidentally the indices of the output matrix
-      iws = i - half_window ! TODO kernel_width
-      iwe = i + half_window ! TODO kernel_width
-      jws = j - half_window ! TODO kernel_height
-      jwe = j + half_window ! TODO kernel_height
+      iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
+      iwe = min(iws + 2*half_window, input_width)          ! TODO kernel_width
+
+      jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
+      jwe = min(jws + 2*half_window, input_height)         ! TODO kernel_height
 
       ! Compute the inner tensor product, sum(w_ij * x_ij), for each filter.
       do concurrent(n = 1:self % filters)
-        self % z(n,iws,jws) = sum(self % kernel(n,:,:,:) * input(:,iws:iwe,jws:jwe))
+        self % z(n,i,j) = sum(self % kernel(n,:,1:iwe-iws+1,1:jwe-jws+1) * input(:,iws:iwe,jws:jwe))
       end do
 
       ! Add bias to the inner product.
-      self % z(:,iws,jws) = self % z(:,iws,jws) + self % biases
+      self % z(:,i,j) = self % z(:,i,j) + self % biases
 
     end do convolution
 
@@ -160,21 +166,28 @@ pure module subroutine backward(self, input, gradient)
     do concurrent( &
       n = 1:self % filters, &
       k = 1:self % channels, &
-      i = istart:iend, &
-      j = jstart:jend &
+      i = 1:self % width, &
+      j = 1:self % height &
+      !i = istart:iend, &
+      !j = jstart:jend &
     )
       ! Start and end indices of the input data on the filter window
-      iws = i - half_window ! TODO kernel_width
-      iwe = i + half_window ! TODO kernel_width
-      jws = j - half_window ! TODO kernel_height
-      jwe = j + half_window ! TODO kernel_height
+      !iws = i - half_window ! TODO kernel_width
+      !iwe = i + half_window ! TODO kernel_width
+      !jws = j - half_window ! TODO kernel_height
+      !jwe = j + half_window ! TODO kernel_height
+      iws = istart + self %stride(1) * (i-1) - half_window ! TODO kernel_width
+      iwe = min(iws + 2*half_window, input_width)          ! TODO kernel_width
+
+      jws = jstart + self %stride(2) * (j-1) - half_window ! TODO kernel_height
+      jwe = min(jws + 2*half_window, input_height)         ! TODO kernel_height
 
       ! dL/dw = sum(dL/dy * sigma'(z) * x)
       dw(n,k,:,:) = dw(n,k,:,:) + input(k,iws:iwe,jws:jwe) * gdz(n,iws:iwe,jws:jwe)
 
       ! dL/dx = dL/dy * sigma'(z) .inner. w
-      self % gradient(k,i,j) = self % gradient(k,i,j) &
-        + sum(gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,:,:))
+      self % gradient(k,iws:iwe,jws:jwe) = self % gradient(k,iws:iwe,jws:jwe) &
+        + gdz(n,iws:iwe,jws:jwe) * self % kernel(n,k,1:iwe-iws+1,1:jwe-jws+1)
 
     end do
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index b7b2815e..fc630fd2 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -67,7 +67,7 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
     integer, intent(in), optional :: stride(:)
     type(layer) :: res
 
-    integer :: stride_tmp(2)
+    integer, allocatable :: stride_tmp(:)
     class(activation_function), allocatable :: activation_tmp
 
     ! Enforce kernel_width == kernel_height for now;
@@ -76,12 +76,6 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
     if (kernel_width /= kernel_height) &
       error stop 'kernel_width must equal kernel_height in a conv2d layer'
 
-    if (size(stride) /= 2 ) &
-      error stop 'size of stride must be equal to 2 in a conv2d layer'
-
-    if (stride(1) < 1 .or. stride(2) < 1) &
-      error stop 'stride must be >= 1 in a conv2d layer'
-
     res % name = 'conv2d'
 
     if (present(activation)) then
@@ -98,9 +92,15 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride)
       stride_tmp = [1, 1]
     endif
 
+    if (size(stride_tmp) /= 2 ) &
+      error stop 'size of stride must be equal to 2 in a conv2d layer'
+
+    if (stride_tmp(1) < 1 .or. stride_tmp(2) < 1) &
+      error stop 'stride must be >= 1 in a conv2d layer'
+
     allocate( &
       res % p, &
-      source=conv2d_layer(filters, kernel_width, activation_tmp, stride) &
+      source=conv2d_layer(filters, kernel_width, activation_tmp, stride_tmp) &
     )
 
   end function conv2d
diff --git a/test/test_conv2d_layer.f90 b/test/test_conv2d_layer.f90
index 2d5868b9..3ee10dc4 100644
--- a/test/test_conv2d_layer.f90
+++ b/test/test_conv2d_layer.f90
@@ -59,6 +59,30 @@ program test_conv2d_layer
     call this_layer % set(sample_input)
   end select
 
+  deallocate(sample_input)
+
+  call conv_layer % forward(input_layer)
+  call conv_layer % get_output(output)
+
+  if (.not. all(abs(output) < tolerance)) then
+    ok = .false.
+    write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed'
+  end if
+
+  ! Minimal conv2d layer: 1 channel, 17x17 pixel image, stride=3;
+  allocate(sample_input(1, 17, 17))
+  sample_input = 0
+
+  input_layer = input(1, 17, 17)
+  conv_layer = conv(filters, kernel_size, kernel_size, stride=[3, 4])
+  call conv_layer % init(input_layer)
+
+  select type(this_layer => input_layer % p); type is(input3d_layer)
+    call this_layer % set(sample_input)
+  end select
+
+  deallocate(sample_input)
+
   call conv_layer % forward(input_layer)
   call conv_layer % get_output(output)
 
@@ -67,6 +91,7 @@ program test_conv2d_layer
     write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed'
   end if
 
+  ! Summary
   if (ok) then
     print '(a)', 'test_conv2d_layer: All tests passed.'
   else

From f500d49049745b5b2e5312301863c51fda3255af Mon Sep 17 00:00:00 2001
From: Jeremie Vandenplas <jeremie.vandenplas@gmail.com>
Date: Fri, 31 Oct 2025 17:51:02 +0100
Subject: [PATCH 6/6] Apply suggestions from code review

---
 test/test_conv1d_layer.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_conv1d_layer.f90 b/test/test_conv1d_layer.f90
index f03dc552..95cbf021 100644
--- a/test/test_conv1d_layer.f90
+++ b/test/test_conv1d_layer.f90
@@ -94,7 +94,7 @@ program test_conv1d_layer
       print '(a)', 'test_conv1d_layer: All tests passed.'
     else
       write(stderr, '(a)') 'test_conv1d_layer: One or more tests failed.'
-      stop 2
+      stop 1
     end if
 
 end program test_conv1d_layer