diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go index 8ad2e1b72b..26e5effd6b 100644 --- a/internal/controller/provisioner/objects.go +++ b/internal/controller/provisioner/objects.go @@ -686,26 +686,8 @@ func (p *NginxProvisioner) buildNginxDeployment( } } - var replicas *int32 - if deploymentCfg.Replicas != nil { - replicas = deploymentCfg.Replicas - } - - if isAutoscalingEnabled(&deploymentCfg) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - hpa := &autoscalingv2.HorizontalPodAutoscaler{} - err := p.k8sClient.Get(ctx, types.NamespacedName{ - Namespace: objectMeta.Namespace, - Name: objectMeta.Name, - }, hpa) - if err == nil && hpa.Status.DesiredReplicas > 0 { - // overwrite with HPA's desiredReplicas - replicas = helpers.GetPointer(hpa.Status.DesiredReplicas) - } - } - + // Determine replica count based on HPA status + replicas := p.determineReplicas(objectMeta, deploymentCfg) if replicas != nil { deployment.Spec.Replicas = replicas } @@ -713,6 +695,60 @@ func (p *NginxProvisioner) buildNginxDeployment( return deployment, nil } +// determineReplicas determines the appropriate replica count for a deployment based on HPA status. +// +// HPA Replicas Management Strategy: +// +// When an HPA is managing a deployment, we must read the current deployment's replicas +// from the cluster and use that value, rather than trying to set our own value or read +// from HPA.Status.DesiredReplicas (which is eventually consistent and stale). +// +// Why we can't use HPA.Status.DesiredReplicas: +// - HPA.Status updates lag behind Deployment.Spec.Replicas changes +// - When HPA scales down: HPA writes Deployment.Spec → then updates its own Status +// - If we read Status during this window, we get the OLD value and overwrite HPA's new value +// - This creates a race condition causing pod churn +// +// Our approach: +// - When HPA exists: Read current deployment replicas from cluster and use that +// - When HPA doesn't exist yet: Set replicas for initial deployment creation +// - When HPA exists but Deployment doesn't exist yet: Set replicas for initial deployment creation +// - When HPA is disabled: Set replicas normally. +func (p *NginxProvisioner) determineReplicas( + objectMeta metav1.ObjectMeta, + deploymentCfg ngfAPIv1alpha2.DeploymentSpec, +) *int32 { + replicas := deploymentCfg.Replicas + + if !isAutoscalingEnabled(&deploymentCfg) { + return replicas + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + err := p.k8sClient.Get(ctx, types.NamespacedName{ + Namespace: objectMeta.Namespace, + Name: objectMeta.Name, + }, hpa) + if err != nil { + return replicas + } + + existingDeployment := &appsv1.Deployment{} + err = p.k8sClient.Get(ctx, types.NamespacedName{ + Namespace: objectMeta.Namespace, + Name: objectMeta.Name, + }, existingDeployment) + + if err == nil && existingDeployment.Spec.Replicas != nil { + replicas = existingDeployment.Spec.Replicas + } + + return replicas +} + // applyPatches applies the provided patches to the given object. func applyPatches(obj client.Object, patches []ngfAPIv1alpha2.Patch) error { if len(patches) == 0 { diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go index cb5f4f4fd7..d19bd6e1e2 100644 --- a/internal/controller/provisioner/objects_test.go +++ b/internal/controller/provisioner/objects_test.go @@ -392,78 +392,156 @@ func TestBuildNginxResourceObjects_NginxProxyConfig(t *testing.T) { func TestBuildNginxResourceObjects_DeploymentReplicasFromHPA(t *testing.T) { t.Parallel() - g := NewWithT(t) - // Create a fake HPA with status.desiredReplicas set - hpa := &autoscalingv2.HorizontalPodAutoscaler{ - ObjectMeta: metav1.ObjectMeta{ - Name: "gw-nginx", - Namespace: "default", + tests := []struct { + currentReplicas *int32 + configReplicas *int32 + name string + description string + expectedValue int32 + hpaExists bool + deploymentExists bool + expectedNil bool + }{ + { + name: "HPA exists - use current deployment replicas", + hpaExists: true, + deploymentExists: true, + currentReplicas: helpers.GetPointer(int32(8)), + configReplicas: helpers.GetPointer(int32(5)), + expectedNil: false, + expectedValue: 8, + description: "When HPA exists, read current deployment replicas (set by HPA)", }, - Status: autoscalingv2.HorizontalPodAutoscalerStatus{ - DesiredReplicas: 7, + { + name: "HPA does not exist - use configured replicas", + hpaExists: false, + deploymentExists: false, + configReplicas: helpers.GetPointer(int32(3)), + expectedNil: false, + expectedValue: 3, + description: "When HPA doesn't exist yet (initial creation), use configured replicas", }, - } - - agentTLSSecret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: agentTLSTestSecretName, - Namespace: ngfNamespace, + { + name: "HPA enabled but doesn't exist, no configured replicas", + hpaExists: false, + deploymentExists: false, + configReplicas: nil, + expectedNil: true, + description: "When HPA enabled but doesn't exist and no replicas configured, don't set replicas", }, - Data: map[string][]byte{"tls.crt": []byte("tls")}, } - fakeClient := fake.NewFakeClient(agentTLSSecret, hpa) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) - provisioner := &NginxProvisioner{ - cfg: Config{ - GatewayPodConfig: &config.GatewayPodConfig{ - Namespace: ngfNamespace, - Version: "1.0.0", - Image: "ngf-image", - }, - AgentTLSSecretName: agentTLSTestSecretName, - }, - baseLabelSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "nginx"}, - }, - k8sClient: fakeClient, - } + agentTLSSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: agentTLSTestSecretName, + Namespace: ngfNamespace, + }, + Data: map[string][]byte{"tls.crt": []byte("tls")}, + } - gateway := &gatewayv1.Gateway{ - ObjectMeta: metav1.ObjectMeta{ - Name: "gw", - Namespace: "default", - }, - Spec: gatewayv1.GatewaySpec{ - Listeners: []gatewayv1.Listener{{Port: 80}}, - }, - } + var fakeClient client.Client + switch { + case tc.hpaExists && tc.deploymentExists: + // Create a fake HPA and existing deployment + hpa := &autoscalingv2.HorizontalPodAutoscaler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw-nginx", + Namespace: "default", + }, + Status: autoscalingv2.HorizontalPodAutoscalerStatus{ + DesiredReplicas: 7, + }, + } + existingDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw-nginx", + Namespace: "default", + }, + Spec: appsv1.DeploymentSpec{ + Replicas: tc.currentReplicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "nginx"}, + }, + }, + } + fakeClient = fake.NewFakeClient(agentTLSSecret, hpa, existingDeployment) + case tc.hpaExists: + hpa := &autoscalingv2.HorizontalPodAutoscaler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw-nginx", + Namespace: "default", + }, + Status: autoscalingv2.HorizontalPodAutoscalerStatus{ + DesiredReplicas: 7, + }, + } + fakeClient = fake.NewFakeClient(agentTLSSecret, hpa) + default: + fakeClient = fake.NewFakeClient(agentTLSSecret) + } - resourceName := "gw-nginx" - nProxyCfg := &graph.EffectiveNginxProxy{ - Kubernetes: &ngfAPIv1alpha2.KubernetesSpec{ - Deployment: &ngfAPIv1alpha2.DeploymentSpec{ - Replicas: nil, // Should be overridden by HPA - Autoscaling: &ngfAPIv1alpha2.AutoscalingSpec{Enable: true}, - }, - }, - } + provisioner := &NginxProvisioner{ + cfg: Config{ + GatewayPodConfig: &config.GatewayPodConfig{ + Namespace: ngfNamespace, + Version: "1.0.0", + Image: "ngf-image", + }, + AgentTLSSecretName: agentTLSTestSecretName, + }, + baseLabelSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "nginx"}, + }, + k8sClient: fakeClient, + } - objects, err := provisioner.buildNginxResourceObjects(resourceName, gateway, nProxyCfg) - g.Expect(err).ToNot(HaveOccurred()) + gateway := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw", + Namespace: "default", + }, + Spec: gatewayv1.GatewaySpec{ + Listeners: []gatewayv1.Listener{{Port: 80}}, + }, + } - // Find the deployment object - var deployment *appsv1.Deployment - for _, obj := range objects { - if d, ok := obj.(*appsv1.Deployment); ok { - deployment = d - break - } + resourceName := "gw-nginx" + nProxyCfg := &graph.EffectiveNginxProxy{ + Kubernetes: &ngfAPIv1alpha2.KubernetesSpec{ + Deployment: &ngfAPIv1alpha2.DeploymentSpec{ + Replicas: tc.configReplicas, + Autoscaling: &ngfAPIv1alpha2.AutoscalingSpec{Enable: true}, + }, + }, + } + + objects, err := provisioner.buildNginxResourceObjects(resourceName, gateway, nProxyCfg) + g.Expect(err).ToNot(HaveOccurred()) + + // Find the deployment object + var deployment *appsv1.Deployment + for _, obj := range objects { + if d, ok := obj.(*appsv1.Deployment); ok { + deployment = d + break + } + } + g.Expect(deployment).ToNot(BeNil()) + + if tc.expectedNil { + g.Expect(deployment.Spec.Replicas).To(BeNil(), tc.description) + } else { + g.Expect(deployment.Spec.Replicas).ToNot(BeNil(), tc.description) + g.Expect(*deployment.Spec.Replicas).To(Equal(tc.expectedValue), tc.description) + } + }) } - g.Expect(deployment).ToNot(BeNil()) - g.Expect(deployment.Spec.Replicas).ToNot(BeNil()) - g.Expect(*deployment.Spec.Replicas).To(Equal(int32(7))) } func TestBuildNginxResourceObjects_Plus(t *testing.T) {