From 74cef94530b3a52b73feef6141ff046cae9c2bf0 Mon Sep 17 00:00:00 2001 From: "Per G. da Silva" Date: Tue, 21 Oct 2025 15:02:14 -0700 Subject: [PATCH 1/8] initial thoughts Signed-off-by: Per G. da Silva --- api/v1/clusterextensionrevision_types.go | 6 +++-- .../clusterextensionrevision_controller.go | 24 ++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/api/v1/clusterextensionrevision_types.go b/api/v1/clusterextensionrevision_types.go index 13ac4ce2a..4fd8cb961 100644 --- a/api/v1/clusterextensionrevision_types.go +++ b/api/v1/clusterextensionrevision_types.go @@ -26,8 +26,9 @@ const ( ClusterExtensionRevisionKind = "ClusterExtensionRevision" // Condition Types - ClusterExtensionRevisionTypeAvailable = "Available" - ClusterExtensionRevisionTypeSucceeded = "Succeeded" + ClusterExtensionRevisionTypeAvailable = "Available" + ClusterExtensionRevisionTypeSucceeded = "Succeeded" + ClusterExtensionRevisionTypeProgressing = "Progressing" // Condition Reasons ClusterExtensionRevisionReasonAvailable = "Available" @@ -40,6 +41,7 @@ const ( ClusterExtensionRevisionReasonIncomplete = "Incomplete" ClusterExtensionRevisionReasonProgressing = "Progressing" ClusterExtensionRevisionReasonArchived = "Archived" + ClusterExtensionRevisionReasonRolloutInProgress = "RolloutInProgress" ) // ClusterExtensionRevisionSpec defines the desired state of ClusterExtensionRevision. diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 888249161..3dfcbe5d9 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -165,6 +165,9 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if verr := rres.GetValidationError(); verr != nil { l.Info("preflight error, retrying after 10s", "err", verr.String()) + // if we take Availability as strictly being "all availability probes pass" + // we should probably be at an Unknown state (or not posted it at all here) + // and post this up in the Progressing condition as False with a failed rollout reason / message meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionFalse, @@ -179,6 +182,8 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if verr := pres.GetValidationError(); verr != nil { l.Info("preflight error, retrying after 10s", "err", verr.String()) + // we probably want to either eat this error and leave Progressing as True with a rolling out reason and implement timeout + // or surface this error through a Degraded-type condition that can flap as roll out continues meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionFalse, @@ -198,7 +203,8 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if len(collidingObjs) > 0 { l.Info("object collision error, retrying after 10s", "collisions", collidingObjs) - + // collisions are probably stickier than phase roll out probe failures - so we'd probably want to set + // Progressing to false here due to the collision meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionFalse, @@ -206,6 +212,8 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), ObservedGeneration: rev.Generation, }) + + // idk if we want to return yet or check the Availability probes on a best-effort basis return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } } @@ -224,6 +232,9 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } // Report status. + // We probably want to set Progressing to False here with a rollout success reason and message + // It would be good to understand from Nico how we can distinguish between progression and availability probes + // and how to best check that all Availability probes are passing meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionTrue, @@ -231,6 +242,10 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev Message: "Object is available and passes all probes.", ObservedGeneration: rev.Generation, }) + + // We'll probably only want to remove this once we are done updating the ClusterExtension conditions + // as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now + // that's fine. if !meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeSucceeded, @@ -247,6 +262,8 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev continue } for _, ores := range pres.GetObjects() { + // we probably want an AvailabilityProbeType and run through all of them independently of whether + // the revision is complete or not pr := ores.Probes()[boxcutter.ProgressProbeType] if pr.Success { continue @@ -254,6 +271,8 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev obj := ores.Object() gvk := obj.GetObjectKind().GroupVersionKind() + // I think these can be pretty large and verbose. We may want to + // work a little on the formatting...? probeFailureMsgs = append(probeFailureMsgs, fmt.Sprintf( "Object %s.%s %s/%s: %v", gvk.Kind, gvk.GroupVersion().String(), @@ -271,6 +290,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev ObservedGeneration: rev.Generation, }) } else { + // either to nothing or set the Progressing condition to True with rolling out reason / message meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionFalse, @@ -281,6 +301,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } } if rres.InTransistion() { + // seems to be the right thing XD meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.TypeProgressing, Status: metav1.ConditionTrue, @@ -289,6 +310,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev ObservedGeneration: rev.Generation, }) } else { + // we may not want to remove it but rather set it to False? meta.RemoveStatusCondition(&rev.Status.Conditions, ocv1.TypeProgressing) } From 3aaef43c6fa4388ff56798841bedf5e38e4046b8 Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Wed, 22 Oct 2025 17:08:21 +0200 Subject: [PATCH 2/8] WIP Introduce Progressing condition --- api/v1/clusterextensionrevision_types.go | 5 +- ...ramework.io_clusterextensionrevisions.yaml | 3 + .../clusterextensionrevision_controller.go | 179 ++++++++++-------- manifests/experimental-e2e.yaml | 3 + manifests/experimental.yaml | 3 + 5 files changed, 117 insertions(+), 76 deletions(-) diff --git a/api/v1/clusterextensionrevision_types.go b/api/v1/clusterextensionrevision_types.go index 4fd8cb961..ff04e687a 100644 --- a/api/v1/clusterextensionrevision_types.go +++ b/api/v1/clusterextensionrevision_types.go @@ -41,7 +41,9 @@ const ( ClusterExtensionRevisionReasonIncomplete = "Incomplete" ClusterExtensionRevisionReasonProgressing = "Progressing" ClusterExtensionRevisionReasonArchived = "Archived" - ClusterExtensionRevisionReasonRolloutInProgress = "RolloutInProgress" + ClusterExtensionRevisionReasonRolloutInProgress = "RollingOut" + ClusterExtensionRevisionReasonRolloutError = "RolloutError" + ClusterExtensionRevisionReasonRolledOut = "RolledOut" ) // ClusterExtensionRevisionSpec defines the desired state of ClusterExtensionRevision. @@ -152,6 +154,7 @@ type ClusterExtensionRevisionStatus struct { // ClusterExtensionRevision is the Schema for the clusterextensionrevisions API // +kubebuilder:printcolumn:name="Available",type=string,JSONPath=`.status.conditions[?(@.type=='Available')].status` +// +kubebuilder:printcolumn:name="Progressing",type=string,JSONPath=`.status.conditions[?(@.type=='Progressing')].status` // +kubebuilder:printcolumn:name=Age,type=date,JSONPath=`.metadata.creationTimestamp` type ClusterExtensionRevision struct { metav1.TypeMeta `json:",inline"` diff --git a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml index 5004c8c6f..d4cab64d1 100644 --- a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml +++ b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml @@ -19,6 +19,9 @@ spec: - jsonPath: .status.conditions[?(@.type=='Available')].status name: Available type: string + - jsonPath: .status.conditions[?(@.type=='Progressing')].status + name: Progressing + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 3dfcbe5d9..0b18325ba 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -126,36 +126,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev // Reconcile // if err := c.ensureFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, - Message: err.Error(), - ObservedGeneration: rev.Generation, - }) return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err) } - if err := c.establishWatch(ctx, rev, revision); err != nil { + inRollout := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) == nil + if inRollout { + if err := c.establishWatch(ctx, rev, revision); err != nil { + werr := fmt.Errorf("establish watch: %v", err) + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, + Message: werr.Error(), + ObservedGeneration: rev.Generation, + }) + return ctrl.Result{}, werr + } meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, - Message: err.Error(), + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRolloutInProgress, + Message: "Revision is being rolled out.", ObservedGeneration: rev.Generation, }) - return ctrl.Result{}, fmt.Errorf("establish watch: %v", err) } rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...) if err != nil { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, - Message: err.Error(), - ObservedGeneration: rev.Generation, - }) + if inRollout { + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRolloutError, + Message: err.Error(), + ObservedGeneration: rev.Generation, + }) + } else { + // it is a probably transient error, and we do not know if the revision is available or not + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeAvailable, + Status: metav1.ConditionUnknown, + Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, + Message: err.Error(), + ObservedGeneration: rev.Generation, + }) + } return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err) } l.Info("reconcile report", "report", rres.String()) @@ -165,16 +180,25 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if verr := rres.GetValidationError(); verr != nil { l.Info("preflight error, retrying after 10s", "err", verr.String()) - // if we take Availability as strictly being "all availability probes pass" - // we should probably be at an Unknown state (or not posted it at all here) - // and post this up in the Progressing condition as False with a failed rollout reason / message - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, - Message: fmt.Sprintf("revision validation error: %s", verr), - ObservedGeneration: rev.Generation, - }) + if inRollout { + // given that we retry, we are not going to keep Progressing condition True + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, + Message: fmt.Sprintf("revision validation error: %s", verr), + ObservedGeneration: rev.Generation, + }) + } else { + // it is a probably transient error, and we do not know if the revision is available or not + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeAvailable, + Status: metav1.ConditionUnknown, + Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, + Message: fmt.Sprintf("revision validation error: %s", verr), + ObservedGeneration: rev.Generation, + }) + } return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } @@ -182,15 +206,25 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if verr := pres.GetValidationError(); verr != nil { l.Info("preflight error, retrying after 10s", "err", verr.String()) - // we probably want to either eat this error and leave Progressing as True with a rolling out reason and implement timeout - // or surface this error through a Degraded-type condition that can flap as roll out continues - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, - Message: fmt.Sprintf("phase %d validation error: %s", i, verr), - ObservedGeneration: rev.Generation, - }) + if inRollout { + // given that we retry, we are not going to keep Progressing condition True + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, + Message: fmt.Sprintf("phase %d validation error: %s", i, verr), + ObservedGeneration: rev.Generation, + }) + } else { + // it is a probably transient error, and we do not know if the revision is available or not + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeAvailable, + Status: metav1.ConditionUnknown, + Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, + Message: fmt.Sprintf("phase %d validation error: %s", i, verr), + ObservedGeneration: rev.Generation, + }) + } return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } @@ -205,19 +239,32 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev l.Info("object collision error, retrying after 10s", "collisions", collidingObjs) // collisions are probably stickier than phase roll out probe failures - so we'd probably want to set // Progressing to false here due to the collision - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions, - Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), - ObservedGeneration: rev.Generation, - }) - - // idk if we want to return yet or check the Availability probes on a best-effort basis - return ctrl.Result{RequeueAfter: 10 * time.Second}, nil + if inRollout { + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions, + Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), + ObservedGeneration: rev.Generation, + }) + + // not sure if we want to retry here - collisions are probably not transient? + return ctrl.Result{RequeueAfter: 10 * time.Second}, nil + } } } + if !rres.InTransistion() { + // we have rolled out all objects in all phases, not interested in probes here + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ClusterExtensionRevisionReasonRolledOut, + Message: "Revision is rolled out.", + ObservedGeneration: rev.Generation, + }) + } + //nolint:nestif if rres.IsComplete() { // Archive other revisions. @@ -231,30 +278,26 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } } - // Report status. - // We probably want to set Progressing to False here with a rollout success reason and message // It would be good to understand from Nico how we can distinguish between progression and availability probes // and how to best check that all Availability probes are passing meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionTrue, Reason: ocv1.ClusterExtensionRevisionReasonAvailable, - Message: "Object is available and passes all probes.", + Message: "Objects are available and passes all probes.", ObservedGeneration: rev.Generation, }) // We'll probably only want to remove this once we are done updating the ClusterExtension conditions // as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now // that's fine. - if !meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeSucceeded, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess, - Message: "Revision succeeded rolling out.", - ObservedGeneration: rev.Generation, - }) - } + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess, + Message: "Revision succeeded rolling out.", + ObservedGeneration: rev.Generation, + }) } else { var probeFailureMsgs []string for _, pres := range rres.GetPhases() { @@ -290,7 +333,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev ObservedGeneration: rev.Generation, }) } else { - // either to nothing or set the Progressing condition to True with rolling out reason / message meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionFalse, @@ -300,19 +342,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev }) } } - if rres.InTransistion() { - // seems to be the right thing XD - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.TypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonProgressing, - Message: "Rollout in progress.", - ObservedGeneration: rev.Generation, - }) - } else { - // we may not want to remove it but rather set it to False? - meta.RemoveStatusCondition(&rev.Status.Conditions, ocv1.TypeProgressing) - } return ctrl.Result{}, nil } diff --git a/manifests/experimental-e2e.yaml b/manifests/experimental-e2e.yaml index 5b09dc949..034eaa381 100644 --- a/manifests/experimental-e2e.yaml +++ b/manifests/experimental-e2e.yaml @@ -610,6 +610,9 @@ spec: - jsonPath: .status.conditions[?(@.type=='Available')].status name: Available type: string + - jsonPath: .status.conditions[?(@.type=='Progressing')].status + name: Progressing + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date diff --git a/manifests/experimental.yaml b/manifests/experimental.yaml index 4ddd45077..3b33f1074 100644 --- a/manifests/experimental.yaml +++ b/manifests/experimental.yaml @@ -575,6 +575,9 @@ spec: - jsonPath: .status.conditions[?(@.type=='Available')].status name: Available type: string + - jsonPath: .status.conditions[?(@.type=='Progressing')].status + name: Progressing + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date From 9e5d754e39155c266185c0ab860db7c78c980990 Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Thu, 23 Oct 2025 15:43:07 +0200 Subject: [PATCH 3/8] Add ClusterExtensionRevision MarkAs(Progressing|NotProgressing|Available|Unavailable) helper methods for improved code readability --- api/v1/clusterextensionrevision_types.go | 42 +++++++++ .../clusterextensionrevision_controller.go | 91 ++++--------------- 2 files changed, 60 insertions(+), 73 deletions(-) diff --git a/api/v1/clusterextensionrevision_types.go b/api/v1/clusterextensionrevision_types.go index ff04e687a..f1583160b 100644 --- a/api/v1/clusterextensionrevision_types.go +++ b/api/v1/clusterextensionrevision_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1 import ( + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" @@ -38,6 +39,7 @@ const ( ClusterExtensionRevisionReasonObjectCollisions = "ObjectCollisions" ClusterExtensionRevisionReasonRolloutSuccess = "RolloutSuccess" ClusterExtensionRevisionReasonProbeFailure = "ProbeFailure" + ClusterExtensionRevisionReasonProbesSucceeded = "ProbesSucceeded" ClusterExtensionRevisionReasonIncomplete = "Incomplete" ClusterExtensionRevisionReasonProgressing = "Progressing" ClusterExtensionRevisionReasonArchived = "Archived" @@ -169,6 +171,46 @@ type ClusterExtensionRevision struct { Status ClusterExtensionRevisionStatus `json:"status,omitempty"` } +func (cer *ClusterExtensionRevision) MarkAsProgressing(reason, message string) { + meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ + Type: ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + ObservedGeneration: cer.Generation, + }) +} + +func (cer *ClusterExtensionRevision) MarkAsNotProgressing(reason, message string) { + meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ + Type: ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + ObservedGeneration: cer.Generation, + }) +} + +func (cer *ClusterExtensionRevision) MarkAsAvailable(reason, message string) { + meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ + Type: ClusterExtensionRevisionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + ObservedGeneration: cer.Generation, + }) +} + +func (cer *ClusterExtensionRevision) MarkAsUnavailable(reason, message string) { + meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{ + Type: ClusterExtensionRevisionTypeAvailable, + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + ObservedGeneration: cer.Generation, + }) +} + // +kubebuilder:object:root=true // ClusterExtensionRevisionList contains a list of ClusterExtensionRevision diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 0b18325ba..3450c35cc 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -129,40 +129,25 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err) } + // If the Available condition is not present, we are still rolling out the objects inRollout := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) == nil if inRollout { if err := c.establishWatch(ctx, rev, revision); err != nil { werr := fmt.Errorf("establish watch: %v", err) - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, - Message: werr.Error(), - ObservedGeneration: rev.Generation, - }) + // this error is very likely transient, so we should keep revision as progressing + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonReconcileFailure, werr.Error()) return ctrl.Result{}, werr } - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRolloutInProgress, - Message: "Revision is being rolled out.", - ObservedGeneration: rev.Generation, - }) + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonRolloutInProgress, "Revision is being rolled out.") } rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...) if err != nil { if inRollout { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRolloutError, - Message: err.Error(), - ObservedGeneration: rev.Generation, - }) + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonRolloutError, err.Error()) } else { // it is a probably transient error, and we do not know if the revision is available or not + // perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile? meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionUnknown, @@ -181,16 +166,11 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev l.Info("preflight error, retrying after 10s", "err", verr.String()) if inRollout { - // given that we retry, we are not going to keep Progressing condition True - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, - Message: fmt.Sprintf("revision validation error: %s", verr), - ObservedGeneration: rev.Generation, - }) + // given that we retry, we are going to keep Progressing condition True + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, fmt.Sprintf("revision validation error: %s", verr)) } else { // it is a probably transient error, and we do not know if the revision is available or not + // perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile? meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionUnknown, @@ -207,16 +187,11 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev l.Info("preflight error, retrying after 10s", "err", verr.String()) if inRollout { - // given that we retry, we are not going to keep Progressing condition True - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, - Message: fmt.Sprintf("phase %d validation error: %s", i, verr), - ObservedGeneration: rev.Generation, - }) + // given that we retry, we are going to keep Progressing condition True + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonPhaseValidationError, fmt.Sprintf("phase %d validation error: %s", i, verr)) } else { // it is a probably transient error, and we do not know if the revision is available or not + // perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile? meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterExtensionRevisionTypeAvailable, Status: metav1.ConditionUnknown, @@ -240,15 +215,9 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev // collisions are probably stickier than phase roll out probe failures - so we'd probably want to set // Progressing to false here due to the collision if inRollout { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions, - Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), - ObservedGeneration: rev.Generation, - }) + rev.MarkAsNotProgressing(ocv1.ClusterExtensionRevisionReasonObjectCollisions, fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n"))) - // not sure if we want to retry here - collisions are probably not transient? + // NOTE(pedjak): not sure if we want to retry here - collisions are probably not transient? return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } } @@ -256,13 +225,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if !rres.InTransistion() { // we have rolled out all objects in all phases, not interested in probes here - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeProgressing, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonRolledOut, - Message: "Revision is rolled out.", - ObservedGeneration: rev.Generation, - }) + rev.MarkAsNotProgressing(ocv1.ClusterExtensionRevisionReasonRolledOut, "Revision is rolled out.") } //nolint:nestif @@ -280,13 +243,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev // It would be good to understand from Nico how we can distinguish between progression and availability probes // and how to best check that all Availability probes are passing - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonAvailable, - Message: "Objects are available and passes all probes.", - ObservedGeneration: rev.Generation, - }) + rev.MarkAsAvailable(ocv1.ClusterExtensionRevisionReasonProbesSucceeded, "Objects are available and pass all probes.") // We'll probably only want to remove this once we are done updating the ClusterExtension conditions // as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now @@ -325,21 +282,9 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev } } if len(probeFailureMsgs) > 0 { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonProbeFailure, - Message: strings.Join(probeFailureMsgs, "\n"), - ObservedGeneration: rev.Generation, - }) + rev.MarkAsUnavailable(ocv1.ClusterExtensionRevisionReasonProbeFailure, strings.Join(probeFailureMsgs, "\n")) } else { - meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ - Type: ocv1.ClusterExtensionRevisionTypeAvailable, - Status: metav1.ConditionFalse, - Reason: ocv1.ClusterExtensionRevisionReasonIncomplete, - Message: "Revision has not been rolled out completely.", - ObservedGeneration: rev.Generation, - }) + rev.MarkAsUnavailable(ocv1.ClusterExtensionRevisionReasonIncomplete, "Revision has not been rolled out completely.") } } From 3ceb4c75516b05a8a2a328c35a064beb6b9ae8a8 Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Fri, 24 Oct 2025 14:12:54 +0200 Subject: [PATCH 4/8] Add ability to play with startup/readines/liveness probes in test-operator Start busybox's httpd and serves probes from created files --- .../v1.0.0/manifests/script.configmap.yaml | 11 ++ .../testoperator.clusterserviceversion.yaml | 38 ++++- .../v1.0.2/manifests/bundle.configmap.yaml | 11 ++ .../olm.operatorframework.com_olme2etest.yaml | 27 ++++ .../testoperator.clusterserviceversion.yaml | 151 ++++++++++++++++++ .../manifests/testoperator.networkpolicy.yaml | 8 + .../v1.0.2/metadata/annotations.yaml | 10 ++ .../v1.2.0/manifests/script.configmap.yaml | 12 ++ .../testoperator.clusterserviceversion.yaml | 38 ++++- .../test-catalog/v1/configs/catalog.yaml | 11 ++ 10 files changed, 303 insertions(+), 14 deletions(-) create mode 100644 testdata/images/bundles/test-operator/v1.0.0/manifests/script.configmap.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.2/manifests/bundle.configmap.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.2/manifests/olm.operatorframework.com_olme2etest.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.clusterserviceversion.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.networkpolicy.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.2/metadata/annotations.yaml create mode 100644 testdata/images/bundles/test-operator/v1.2.0/manifests/script.configmap.yaml diff --git a/testdata/images/bundles/test-operator/v1.0.0/manifests/script.configmap.yaml b/testdata/images/bundles/test-operator/v1.0.0/manifests/script.configmap.yaml new file mode 100644 index 000000000..a0404835a --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.0/manifests/script.configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: httpd-script +data: + httpd.sh: | + #!/bin/sh + echo true > /var/www/started + echo true > /var/www/ready + echo true > /var/www/live + exec httpd -f -h /var/www -p 80 diff --git a/testdata/images/bundles/test-operator/v1.0.0/manifests/testoperator.clusterserviceversion.yaml b/testdata/images/bundles/test-operator/v1.0.0/manifests/testoperator.clusterserviceversion.yaml index 4cb49d5fe..bc6cd77d2 100644 --- a/testdata/images/bundles/test-operator/v1.0.0/manifests/testoperator.clusterserviceversion.yaml +++ b/testdata/images/bundles/test-operator/v1.0.0/manifests/testoperator.clusterserviceversion.yaml @@ -56,15 +56,39 @@ spec: app: olme2etest spec: terminationGracePeriodSeconds: 0 + volumes: + - name: scripts + configMap: + name: httpd-script + defaultMode: 0755 containers: - - name: busybox + - name: busybox-httpd-container image: busybox:1.36 - command: - - 'sleep' - - '1000' - securityContext: - runAsUser: 1000 - runAsNonRoot: true + command: ["/scripts/httpd.sh"] + ports: + - containerPort: 80 + volumeMounts: + - name: scripts + mountPath: /scripts + readOnly: true + startupProbe: + httpGet: + path: /started + port: 80 + failureThreshold: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /live + port: 80 + failureThreshold: 1 + periodSeconds: 2 + readinessProbe: + httpGet: + path: /ready + port: 80 + initialDelaySeconds: 1 + periodSeconds: 1 serviceAccountName: simple-bundle-manager clusterPermissions: - rules: diff --git a/testdata/images/bundles/test-operator/v1.0.2/manifests/bundle.configmap.yaml b/testdata/images/bundles/test-operator/v1.0.2/manifests/bundle.configmap.yaml new file mode 100644 index 000000000..0279603bf --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.2/manifests/bundle.configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: test-configmap + annotations: + shouldNotTemplate: > + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + +data: + version: "v1.0.2" + name: "test-configmap" diff --git a/testdata/images/bundles/test-operator/v1.0.2/manifests/olm.operatorframework.com_olme2etest.yaml b/testdata/images/bundles/test-operator/v1.0.2/manifests/olm.operatorframework.com_olme2etest.yaml new file mode 100644 index 000000000..44e64cef7 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.2/manifests/olm.operatorframework.com_olme2etest.yaml @@ -0,0 +1,27 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: olme2etests.olm.operatorframework.io +spec: + group: olm.operatorframework.io + names: + kind: OLME2ETest + listKind: OLME2ETestList + plural: olme2etests + singular: olme2etest + scope: Cluster + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + testField: + type: string diff --git a/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.clusterserviceversion.yaml b/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.clusterserviceversion.yaml new file mode 100644 index 000000000..f4a44f6b7 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.clusterserviceversion.yaml @@ -0,0 +1,151 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: ClusterServiceVersion +metadata: + annotations: + alm-examples: |- + [ + { + "apiVersion": "olme2etests.olm.operatorframework.io/v1", + "kind": "OLME2ETests", + "metadata": { + "labels": { + "app.kubernetes.io/managed-by": "kustomize", + "app.kubernetes.io/name": "test" + }, + "name": "test-sample" + }, + "spec": null + } + ] + capabilities: Basic Install + createdAt: "2024-10-24T19:21:40Z" + operators.operatorframework.io/builder: operator-sdk-v1.34.1 + operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 + name: testoperator.v1.0.2 + namespace: placeholder +spec: + apiservicedefinitions: {} + customresourcedefinitions: + owned: + - description: Configures subsections of Alertmanager configuration specific to each namespace + displayName: OLME2ETest + kind: OLME2ETest + name: olme2etests.olm.operatorframework.io + version: v1 + description: OLM E2E Testing Operator with a wrong image ref + displayName: test-operator + icon: + - base64data: "" + mediatype: "" + install: + spec: + deployments: + - label: + app.kubernetes.io/component: controller + app.kubernetes.io/name: test-operator + app.kubernetes.io/version: 1.0.2 + name: test-operator + spec: + replicas: 1 + selector: + matchLabels: + app: olme2etest + template: + metadata: + labels: + app: olme2etest + spec: + terminationGracePeriodSeconds: 0 + volumes: + - name: scripts + configMap: + name: httpd-script + defaultMode: 0755 + containers: + - name: busybox-httpd-container + # This image ref is wrong and should trigger ImagePullBackOff condition + image: wrong/image + serviceAccountName: simple-bundle-manager + clusterPermissions: + - rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + serviceAccountName: simple-bundle-manager + permissions: + - rules: + - apiGroups: + - "" + resources: + - configmaps + - serviceaccounts + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - create + - update + - delete + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + serviceAccountName: simple-bundle-manager + strategy: deployment + installModes: + - supported: false + type: OwnNamespace + - supported: true + type: SingleNamespace + - supported: false + type: MultiNamespace + - supported: true + type: AllNamespaces + keywords: + - registry + links: + - name: simple-bundle + url: https://simple-bundle.domain + maintainers: + - email: main#simple-bundle.domain + name: Simple Bundle + maturity: beta + provider: + name: Simple Bundle + url: https://simple-bundle.domain + version: 1.0.2 diff --git a/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.networkpolicy.yaml b/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.networkpolicy.yaml new file mode 100644 index 000000000..20a5ea834 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.2/manifests/testoperator.networkpolicy.yaml @@ -0,0 +1,8 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: test-operator-network-policy +spec: + podSelector: {} + policyTypes: + - Ingress diff --git a/testdata/images/bundles/test-operator/v1.0.2/metadata/annotations.yaml b/testdata/images/bundles/test-operator/v1.0.2/metadata/annotations.yaml new file mode 100644 index 000000000..404f0f4a3 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.2/metadata/annotations.yaml @@ -0,0 +1,10 @@ +annotations: + # Core bundle annotations. + operators.operatorframework.io.bundle.mediatype.v1: registry+v1 + operators.operatorframework.io.bundle.manifests.v1: manifests/ + operators.operatorframework.io.bundle.metadata.v1: metadata/ + operators.operatorframework.io.bundle.package.v1: test + operators.operatorframework.io.bundle.channels.v1: beta + operators.operatorframework.io.metrics.builder: operator-sdk-v1.28.0 + operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 + operators.operatorframework.io.metrics.project_layout: unknown diff --git a/testdata/images/bundles/test-operator/v1.2.0/manifests/script.configmap.yaml b/testdata/images/bundles/test-operator/v1.2.0/manifests/script.configmap.yaml new file mode 100644 index 000000000..6270e4ad7 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.2.0/manifests/script.configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: httpd-script +data: + httpd.sh: | + #!/bin/sh + echo "Version 1.2.0" + echo true > /var/www/started + echo true > /var/www/ready + echo true > /var/www/live + exec httpd -f -h /var/www -p 80 diff --git a/testdata/images/bundles/test-operator/v1.2.0/manifests/testoperator.clusterserviceversion.yaml b/testdata/images/bundles/test-operator/v1.2.0/manifests/testoperator.clusterserviceversion.yaml index 90621bc6d..9cb819615 100644 --- a/testdata/images/bundles/test-operator/v1.2.0/manifests/testoperator.clusterserviceversion.yaml +++ b/testdata/images/bundles/test-operator/v1.2.0/manifests/testoperator.clusterserviceversion.yaml @@ -56,15 +56,39 @@ spec: app: olme2etest spec: terminationGracePeriodSeconds: 0 + volumes: + - name: scripts + configMap: + name: httpd-script + defaultMode: 0755 containers: - - name: busybox + - name: busybox-httpd-container image: busybox:1.37 - command: - - 'sleep' - - '1000' - securityContext: - runAsUser: 1000 - runAsNonRoot: true + command: ["/scripts/httpd.sh"] + ports: + - containerPort: 80 + volumeMounts: + - name: scripts + mountPath: /scripts + readOnly: true + startupProbe: + httpGet: + path: /started + port: 80 + failureThreshold: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /live + port: 80 + failureThreshold: 1 + periodSeconds: 2 + readinessProbe: + httpGet: + path: /ready + port: 80 + initialDelaySeconds: 1 + periodSeconds: 1 serviceAccountName: simple-bundle-manager clusterPermissions: - rules: diff --git a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml index 437175a4e..49340a2f9 100644 --- a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml +++ b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml @@ -7,6 +7,7 @@ name: alpha package: test entries: - name: test-operator.1.0.0 + - name: test-operator.1.0.2 --- schema: olm.channel name: beta @@ -39,6 +40,16 @@ properties: version: 1.0.1 --- schema: olm.bundle +name: test-operator.1.0.2 +package: test +image: docker-registry.operator-controller-e2e.svc.cluster.local:5000/bundles/registry-v1/test-operator:v1.0.2 +properties: + - type: olm.package + value: + packageName: test + version: 1.0.2 +--- +schema: olm.bundle name: test-operator.1.2.0 package: test image: docker-registry.operator-controller-e2e.svc.cluster.local:5000/bundles/registry-v1/test-operator:v1.2.0 From 1622df1cb938d3c41816ce1b5350deadfe7a254c Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Mon, 27 Oct 2025 16:51:57 +0100 Subject: [PATCH 5/8] propagate conditions from CER to cluster extension * added `.status.activeRevisions` for tracking * `Progressing` is mirrored from the counterpart of the latest revision * `Available` is mirrored from the counterpart of the latest installed revision * If the latest revision is rolled out, but not yet available, its `Available` condition is mirrored under its `activeRevision` entry. --- api/v1/clusterextension_types.go | 16 +++- api/v1/zz_generated.deepcopy.go | 29 +++++++ ...peratorframework.io_clusterextensions.yaml | 76 ++++++++++++++++++- ...peratorframework.io_clusterextensions.yaml | 4 +- .../operator-controller/applier/boxcutter.go | 2 - .../clusterextension_controller.go | 65 ++++++++++------ .../clusterextensionrevision_controller.go | 8 +- manifests/experimental-e2e.yaml | 76 ++++++++++++++++++- manifests/experimental.yaml | 76 ++++++++++++++++++- manifests/standard-e2e.yaml | 4 +- manifests/standard.yaml | 4 +- 11 files changed, 317 insertions(+), 43 deletions(-) diff --git a/api/v1/clusterextension_types.go b/api/v1/clusterextension_types.go index 343e8cbb4..be08d217a 100644 --- a/api/v1/clusterextension_types.go +++ b/api/v1/clusterextension_types.go @@ -469,6 +469,14 @@ type BundleMetadata struct { Version string `json:"version"` } +type RevisionStatus struct { + Name string `json:"name"` + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + // ClusterExtensionStatus defines the observed state of a ClusterExtension. type ClusterExtensionStatus struct { // The set of condition types which apply to all spec.source variations are Installed and Progressing. @@ -498,6 +506,12 @@ type ClusterExtensionStatus struct { // // +optional Install *ClusterExtensionInstallStatus `json:"install,omitempty"` + + // +listType=map + // +listMapKey=name + // +optional + // + ActiveRevisions []RevisionStatus `json:"activeRevisions,omitempty"` } // ClusterExtensionInstallStatus is a representation of the status of the identified bundle. @@ -516,7 +530,7 @@ type ClusterExtensionInstallStatus struct { // +kubebuilder:subresource:status // +kubebuilder:printcolumn:name="Installed Bundle",type=string,JSONPath=`.status.install.bundle.name` // +kubebuilder:printcolumn:name=Version,type=string,JSONPath=`.status.install.bundle.version` -// +kubebuilder:printcolumn:name="Installed",type=string,JSONPath=`.status.conditions[?(@.type=='Installed')].status` +// +kubebuilder:printcolumn:name="Available",type=string,JSONPath=`.status.conditions[?(@.type=='Available')].status` // +kubebuilder:printcolumn:name="Progressing",type=string,JSONPath=`.status.conditions[?(@.type=='Progressing')].status` // +kubebuilder:printcolumn:name=Age,type=date,JSONPath=`.metadata.creationTimestamp` diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index e13f1532b..b8069d783 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -542,6 +542,13 @@ func (in *ClusterExtensionStatus) DeepCopyInto(out *ClusterExtensionStatus) { *out = new(ClusterExtensionInstallStatus) **out = **in } + if in.ActiveRevisions != nil { + in, out := &in.ActiveRevisions, &out.ActiveRevisions + *out = make([]RevisionStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterExtensionStatus. @@ -629,6 +636,28 @@ func (in *ResolvedImageSource) DeepCopy() *ResolvedImageSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RevisionStatus) DeepCopyInto(out *RevisionStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RevisionStatus. +func (in *RevisionStatus) DeepCopy() *RevisionStatus { + if in == nil { + return nil + } + out := new(RevisionStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceAccountReference) DeepCopyInto(out *ServiceAccountReference) { *out = *in diff --git a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml index 1038b7fdf..33f7b1a4b 100644 --- a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml +++ b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml @@ -22,8 +22,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing @@ -505,6 +505,78 @@ spec: description: status is an optional field that defines the observed state of the ClusterExtension. properties: + activeRevisions: + items: + properties: + conditions: + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map conditions: description: |- The set of condition types which apply to all spec.source variations are Installed and Progressing. diff --git a/helm/olmv1/base/operator-controller/crd/standard/olm.operatorframework.io_clusterextensions.yaml b/helm/olmv1/base/operator-controller/crd/standard/olm.operatorframework.io_clusterextensions.yaml index 61337bad6..96ada10dd 100644 --- a/helm/olmv1/base/operator-controller/crd/standard/olm.operatorframework.io_clusterextensions.yaml +++ b/helm/olmv1/base/operator-controller/crd/standard/olm.operatorframework.io_clusterextensions.yaml @@ -22,8 +22,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing diff --git a/internal/operator-controller/applier/boxcutter.go b/internal/operator-controller/applier/boxcutter.go index fa3f85e79..5212e36e9 100644 --- a/internal/operator-controller/applier/boxcutter.go +++ b/internal/operator-controller/applier/boxcutter.go @@ -317,8 +317,6 @@ func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust return false, "New revision created", nil } else if progressingCondition != nil && progressingCondition.Status == metav1.ConditionTrue { return false, progressingCondition.Message, nil - } else if availableCondition != nil && availableCondition.Status != metav1.ConditionTrue { - return false, "", errors.New(availableCondition.Message) } else if succeededCondition != nil && succeededCondition.Status != metav1.ConditionTrue { return false, succeededCondition.Message, nil } diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index 7bcedde65..03ff7c6e7 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -297,32 +297,43 @@ func (r *ClusterExtensionReconciler) reconcile(ctx context.Context, ext *ocv1.Cl // to ensure exponential backoff can occur: // - Permission errors (it is not possible to watch changes to permissions. // The only way to eventually recover from permission errors is to keep retrying). - rolloutSucceeded, rolloutStatus, err := r.Applier.Apply(ctx, imageFS, ext, objLbls, storeLbls) - - // Set installed status - if rolloutSucceeded { - revisionStates = &RevisionStates{Installed: resolvedRevisionMetadata} - } else if err == nil && revisionStates.Installed == nil && len(revisionStates.RollingOut) == 0 { - revisionStates = &RevisionStates{RollingOut: []*RevisionMetadata{resolvedRevisionMetadata}} - } - setInstalledStatusFromRevisionStates(ext, revisionStates) - - // If there was an error applying the resolved bundle, - // report the error via the Progressing condition. - if err != nil { + if _, _, err := r.Applier.Apply(ctx, imageFS, ext, objLbls, storeLbls); err != nil { + // If there was an error applying the resolved bundle, + // report the error via the Progressing condition. setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) return ctrl.Result{}, err - } else if !rolloutSucceeded { - apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{ - Type: ocv1.TypeProgressing, - Status: metav1.ConditionTrue, - Reason: ocv1.ReasonRolloutInProgress, - Message: rolloutStatus, - ObservedGeneration: ext.GetGeneration(), - }) - } else { - setStatusProgressing(ext, nil) } + + // Mirror Available/Progressing conditions from the installed revision + if i := revisionStates.Installed; i != nil { + for _, cndType := range []string{ocv1.ClusterExtensionRevisionTypeAvailable, ocv1.ClusterExtensionRevisionTypeProgressing} { + cnd := *apimeta.FindStatusCondition(i.Conditions, cndType) + apimeta.SetStatusCondition(&ext.Status.Conditions, cnd) + } + ext.Status.Install = &ocv1.ClusterExtensionInstallStatus{ + Bundle: i.BundleMetadata, + } + ext.Status.ActiveRevisions = []ocv1.RevisionStatus{{Name: i.RevName}} + } + for idx, r := range revisionStates.RollingOut { + rs := ocv1.RevisionStatus{Name: r.RevName} + // Mirror Progressing condition from the latest active revision + if idx == len(revisionStates.RollingOut)-1 { + pcnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + if pcnd != nil { + apimeta.SetStatusCondition(&ext.Status.Conditions, *pcnd) + } + if acnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable); pcnd.Status == metav1.ConditionFalse && acnd != nil && acnd.Status != metav1.ConditionTrue { + apimeta.SetStatusCondition(&rs.Conditions, *acnd) + } + } + if len(ext.Status.ActiveRevisions) == 0 { + ext.Status.ActiveRevisions = []ocv1.RevisionStatus{rs} + } else { + ext.Status.ActiveRevisions = append(ext.Status.ActiveRevisions, rs) + } + } + return ctrl.Result{}, nil } @@ -474,9 +485,11 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh } type RevisionMetadata struct { + RevName string Package string Image string ocv1.BundleMetadata + Conditions []metav1.Condition } type RevisionStates struct { @@ -553,8 +566,10 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. rm := &RevisionMetadata{ - Package: rev.Labels[labels.PackageNameKey], - Image: rev.Annotations[labels.BundleReferenceKey], + RevName: rev.Name, + Package: rev.Labels[labels.PackageNameKey], + Image: rev.Annotations[labels.BundleReferenceKey], + Conditions: rev.Status.Conditions, BundleMetadata: ocv1.BundleMetadata{ Name: rev.Annotations[labels.BundleNameKey], Version: rev.Annotations[labels.BundleVersionKey], diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 3450c35cc..386fbe8f2 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/operator-framework/operator-controller/internal/operator-controller/labels" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions" @@ -122,6 +123,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev return c.teardown(ctx, rev, revision) } + revVersion := rev.GetAnnotations()[labels.BundleVersionKey] // // Reconcile // @@ -138,7 +140,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonReconcileFailure, werr.Error()) return ctrl.Result{}, werr } - rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonRolloutInProgress, "Revision is being rolled out.") + rev.MarkAsProgressing(ocv1.ClusterExtensionRevisionReasonRolloutInProgress, fmt.Sprintf("Revision %s is being rolled out.", revVersion)) } rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...) @@ -225,7 +227,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if !rres.InTransistion() { // we have rolled out all objects in all phases, not interested in probes here - rev.MarkAsNotProgressing(ocv1.ClusterExtensionRevisionReasonRolledOut, "Revision is rolled out.") + rev.MarkAsNotProgressing(ocv1.ClusterExtensionRevisionReasonRolledOut, fmt.Sprintf("Revision %s is rolled out.", revVersion)) } //nolint:nestif @@ -284,7 +286,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev if len(probeFailureMsgs) > 0 { rev.MarkAsUnavailable(ocv1.ClusterExtensionRevisionReasonProbeFailure, strings.Join(probeFailureMsgs, "\n")) } else { - rev.MarkAsUnavailable(ocv1.ClusterExtensionRevisionReasonIncomplete, "Revision has not been rolled out completely.") + rev.MarkAsUnavailable(ocv1.ClusterExtensionRevisionReasonIncomplete, fmt.Sprintf("Revision %s has not been rolled out completely.", revVersion)) } } diff --git a/manifests/experimental-e2e.yaml b/manifests/experimental-e2e.yaml index 034eaa381..6c2b525ed 100644 --- a/manifests/experimental-e2e.yaml +++ b/manifests/experimental-e2e.yaml @@ -830,8 +830,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing @@ -1313,6 +1313,78 @@ spec: description: status is an optional field that defines the observed state of the ClusterExtension. properties: + activeRevisions: + items: + properties: + conditions: + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map conditions: description: |- The set of condition types which apply to all spec.source variations are Installed and Progressing. diff --git a/manifests/experimental.yaml b/manifests/experimental.yaml index 3b33f1074..f35abe9b1 100644 --- a/manifests/experimental.yaml +++ b/manifests/experimental.yaml @@ -795,8 +795,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing @@ -1278,6 +1278,78 @@ spec: description: status is an optional field that defines the observed state of the ClusterExtension. properties: + activeRevisions: + items: + properties: + conditions: + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map conditions: description: |- The set of condition types which apply to all spec.source variations are Installed and Progressing. diff --git a/manifests/standard-e2e.yaml b/manifests/standard-e2e.yaml index 9c7a20f57..077c394f8 100644 --- a/manifests/standard-e2e.yaml +++ b/manifests/standard-e2e.yaml @@ -613,8 +613,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing diff --git a/manifests/standard.yaml b/manifests/standard.yaml index cf7eb0ee5..289612e09 100644 --- a/manifests/standard.yaml +++ b/manifests/standard.yaml @@ -578,8 +578,8 @@ spec: - jsonPath: .status.install.bundle.version name: Version type: string - - jsonPath: .status.conditions[?(@.type=='Installed')].status - name: Installed + - jsonPath: .status.conditions[?(@.type=='Available')].status + name: Available type: string - jsonPath: .status.conditions[?(@.type=='Progressing')].status name: Progressing From 928e47a4fdcb4d1839a203a0ff26c77ee7e39c83 Mon Sep 17 00:00:00 2001 From: "Per G. da Silva" Date: Tue, 28 Oct 2025 16:15:19 -0700 Subject: [PATCH 6/8] Update ClusterExtensionRevision reconciler unit tests Signed-off-by: Per G. da Silva --- .../clusterextensionrevision_controller.go | 2 +- ...lusterextensionrevision_controller_test.go | 33 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 386fbe8f2..0ead0dc03 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -10,7 +10,6 @@ import ( "strings" "time" - "github.com/operator-framework/operator-controller/internal/operator-controller/labels" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions" @@ -35,6 +34,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ocv1 "github.com/operator-framework/operator-controller/api/v1" + "github.com/operator-framework/operator-controller/internal/operator-controller/labels" ) const ( diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go index 873a6cc74..6e2475a73 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller_test.go @@ -29,6 +29,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/controllers" + "github.com/operator-framework/operator-controller/internal/operator-controller/labels" ) func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *testing.T) { @@ -81,7 +82,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te require.NotNil(t, cond) require.Equal(t, metav1.ConditionFalse, cond.Status) require.Equal(t, ocv1.ClusterExtensionRevisionReasonIncomplete, cond.Reason) - require.Equal(t, "Revision has not been rolled out completely.", cond.Message) + require.Equal(t, "Revision 1.0.0 has not been rolled out completely.", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, @@ -175,7 +176,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te }, }, { - name: "set Progressing:True:Progressing condition while revision is transitioning", + name: "set Progressing:True:RollingOut condition while revision is transitioning", revisionResult: mockRevisionResult{ inTransition: true, }, @@ -194,13 +195,13 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.TypeProgressing) require.NotNil(t, cond) require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ClusterExtensionRevisionReasonProgressing, cond.Reason) - require.Equal(t, "Rollout in progress.", cond.Message) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRolloutInProgress, cond.Reason) + require.Equal(t, "Revision 1.0.0 is being rolled out.", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "remove Progressing condition once transition rollout is finished", + name: "set Progressing:False:RolledOut once transition rollout is finished", revisionResult: mockRevisionResult{ inTransition: false, }, @@ -211,8 +212,8 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ Type: ocv1.TypeProgressing, Status: metav1.ConditionTrue, - Reason: ocv1.ClusterExtensionRevisionReasonProgressing, - Message: "some message", + Reason: ocv1.ClusterExtensionRevisionReasonRolledOut, + Message: "Revision 1.0.0 is rolled out.", ObservedGeneration: 1, }) return []client.Object{ext, rev1} @@ -224,11 +225,15 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te }, rev) require.NoError(t, err) cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.TypeProgressing) - require.Nil(t, cond) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionFalse, cond.Status) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonRolledOut, cond.Reason) + require.Equal(t, "Revision 1.0.0 is rolled out.", cond.Message) + require.Equal(t, int64(1), cond.ObservedGeneration) }, }, { - name: "set Available:True:Available and Succeeded:True:RolloutSuccess conditions on successful revision rollout", + name: "set Available:True:ProbesSucceeded and Succeeded:True:RolloutSuccess conditions on successful revision rollout", revisionResult: mockRevisionResult{ isComplete: true, }, @@ -247,8 +252,8 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) require.NotNil(t, cond) require.Equal(t, metav1.ConditionTrue, cond.Status) - require.Equal(t, ocv1.ClusterExtensionRevisionReasonAvailable, cond.Reason) - require.Equal(t, "Object is available and passes all probes.", cond.Message) + require.Equal(t, ocv1.ClusterExtensionRevisionReasonProbesSucceeded, cond.Reason) + require.Equal(t, "Objects are available and pass all probes.", cond.Message) require.Equal(t, int64(1), cond.ObservedGeneration) cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) @@ -709,6 +714,12 @@ func newTestClusterExtensionRevision(name string) *ocv1.ClusterExtensionRevision Name: name, UID: types.UID(name), Generation: int64(1), + Annotations: map[string]string{ + labels.PackageNameKey: "some-package", + labels.BundleNameKey: "some-package.v1.0.0", + labels.BundleReferenceKey: "registry.io/some-repo/some-package:v1.0.0", + labels.BundleVersionKey: "1.0.0", + }, }, Spec: ocv1.ClusterExtensionRevisionSpec{ Phases: []ocv1.ClusterExtensionRevisionPhase{ From 85117221496dd34548efce9ec685d48760b308ac Mon Sep 17 00:00:00 2001 From: "Per G. da Silva" Date: Tue, 28 Oct 2025 16:18:51 -0700 Subject: [PATCH 7/8] Update docs Signed-off-by: Per G. da Silva --- docs/api-reference/olmv1-api-reference.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/api-reference/olmv1-api-reference.md b/docs/api-reference/olmv1-api-reference.md index c0da40c4b..4315286c8 100644 --- a/docs/api-reference/olmv1-api-reference.md +++ b/docs/api-reference/olmv1-api-reference.md @@ -361,6 +361,7 @@ _Appears in:_ | --- | --- | --- | --- | | `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | The set of condition types which apply to all spec.source variations are Installed and Progressing.

The Installed condition represents whether or not the bundle has been installed for this ClusterExtension.
When Installed is True and the Reason is Succeeded, the bundle has been successfully installed.
When Installed is False and the Reason is Failed, the bundle has failed to install.

The Progressing condition represents whether or not the ClusterExtension is advancing towards a new state.
When Progressing is True and the Reason is Succeeded, the ClusterExtension is making progress towards a new state.
When Progressing is True and the Reason is Retrying, the ClusterExtension has encountered an error that could be resolved on subsequent reconciliation attempts.
When Progressing is False and the Reason is Blocked, the ClusterExtension has encountered an error that requires manual intervention for recovery.

When the ClusterExtension is sourced from a catalog, if may also communicate a deprecation condition.
These are indications from a package owner to guide users away from a particular package, channel, or bundle.
BundleDeprecated is set if the requested bundle version is marked deprecated in the catalog.
ChannelDeprecated is set if the requested channel is marked deprecated in the catalog.
PackageDeprecated is set if the requested package is marked deprecated in the catalog.
Deprecated is a rollup condition that is present when any of the deprecated conditions are present. | | | | `install` _[ClusterExtensionInstallStatus](#clusterextensioninstallstatus)_ | install is a representation of the current installation status for this ClusterExtension. | | | +| `activeRevisions` _[RevisionStatus](#revisionstatus) array_ | | | | @@ -436,6 +437,23 @@ _Appears in:_ | `ref` _string_ | ref contains the resolved image digest-based reference.
The digest format is used so users can use other tooling to fetch the exact
OCI manifests that were used to extract the catalog contents. | | MaxLength: 1000
Required: \{\}
| +#### RevisionStatus + + + + + + + +_Appears in:_ +- [ClusterExtensionStatus](#clusterextensionstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | | | | +| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | | | | + + #### ServiceAccountReference From 7e9a60106dbc3c08be0c3eeab5dc65e1d18e4d01 Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Wed, 5 Nov 2025 11:48:28 +0100 Subject: [PATCH 8/8] Split `reconcile` function into step-based oriented approach The logic present in private `reconcile` function of `ClusterExtensionReconciler` is refactored into step-oriented approach for increased modularity, so that Helm and Boxcutter based approaches can be wired, implemented, and tested in an easier way. --- cmd/operator-controller/main.go | 16 + .../controllers/boxcutter_reconcile_steps.go | 158 +++++++++ .../clusterextension_controller.go | 266 ++-------------- .../clusterextension_controller_test.go | 300 ++++++++++-------- .../clusterextension_reconcile_steps.go | 211 ++++++++++++ .../controllers/suite_test.go | 18 +- 6 files changed, 597 insertions(+), 372 deletions(-) create mode 100644 internal/operator-controller/controllers/boxcutter_reconcile_steps.go create mode 100644 internal/operator-controller/controllers/clusterextension_reconcile_steps.go diff --git a/cmd/operator-controller/main.go b/cmd/operator-controller/main.go index fba7c39af..8d693b584 100644 --- a/cmd/operator-controller/main.go +++ b/cmd/operator-controller/main.go @@ -571,6 +571,14 @@ func setupBoxcutter( ActionClientGetter: acg, RevisionGenerator: rg, } + ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ + controllers.HandleFinalizers(ceReconciler.Finalizers), + controllers.MigrateStorage(ceReconciler.StorageMigrator), + controllers.RetrieveRevisionStates(ceReconciler.RevisionStatesGetter), + controllers.RetrieveRevisionMetadata(ceReconciler.Resolver), + controllers.UnpackBundle(ceReconciler.ImagePuller, ceReconciler.ImageCache), + controllers.ApplyBundleWithBoxcutter(ceReconciler.Applier), + } discoveryClient, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) if err != nil { @@ -679,6 +687,14 @@ func setupHelm( Manager: cm, } ceReconciler.RevisionStatesGetter = &controllers.HelmRevisionStatesGetter{ActionClientGetter: acg} + ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{ + controllers.HandleFinalizers(ceReconciler.Finalizers), + controllers.RetrieveRevisionStates(ceReconciler.RevisionStatesGetter), + controllers.RetrieveRevisionMetadata(ceReconciler.Resolver), + controllers.UnpackBundle(ceReconciler.ImagePuller, ceReconciler.ImageCache), + controllers.ApplyBundle(ceReconciler.Applier), + } + return nil } diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go new file mode 100644 index 000000000..258f4e9f0 --- /dev/null +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -0,0 +1,158 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "cmp" + "context" + "fmt" + "io/fs" + "slices" + + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + ocv1 "github.com/operator-framework/operator-controller/api/v1" + "github.com/operator-framework/operator-controller/internal/operator-controller/labels" +) + +type BoxcutterRevisionStatesGetter struct { + Reader client.Reader +} + +func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*RevisionStates, error) { + // TODO: boxcutter applier has a nearly identical bit of code for listing and sorting revisions + // only difference here is that it sorts in reverse order to start iterating with the most + // recent revisions. We should consolidate to avoid code duplication. + existingRevisionList := &ocv1.ClusterExtensionRevisionList{} + if err := d.Reader.List(ctx, existingRevisionList, client.MatchingLabels{ + ClusterExtensionRevisionOwnerLabel: ext.Name, + }); err != nil { + return nil, fmt.Errorf("listing revisions: %w", err) + } + slices.SortFunc(existingRevisionList.Items, func(a, b ocv1.ClusterExtensionRevision) int { + return cmp.Compare(a.Spec.Revision, b.Spec.Revision) + }) + + rs := &RevisionStates{} + for _, rev := range existingRevisionList.Items { + switch rev.Spec.LifecycleState { + case ocv1.ClusterExtensionRevisionLifecycleStateActive, + ocv1.ClusterExtensionRevisionLifecycleStatePaused: + default: + // Skip anything not active or paused, which should only be "Archived". + continue + } + + // TODO: the setting of these annotations (happens in boxcutter applier when we pass in "storageLabels") + // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate + // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. + rm := &RevisionMetadata{ + RevName: rev.Name, + Package: rev.Labels[labels.PackageNameKey], + Image: rev.Annotations[labels.BundleReferenceKey], + Conditions: rev.Status.Conditions, + BundleMetadata: ocv1.BundleMetadata{ + Name: rev.Annotations[labels.BundleNameKey], + Version: rev.Annotations[labels.BundleVersionKey], + }, + } + + if apimeta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { + rs.Installed = rm + } else { + rs.RollingOut = append(rs.RollingOut, rm) + } + } + + return rs, nil +} + +func MigrateStorage(m StorageMigrator) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + objLbls := map[string]string{ + labels.OwnerKindKey: ocv1.ClusterExtensionKind, + labels.OwnerNameKey: ext.GetName(), + } + + if err := m.Migrate(ctx, ext, objLbls); err != nil { + return ctx, nil, fmt.Errorf("migrating storage: %w", err) + } + return ctx, nil, nil + } +} + +func ApplyBundleWithBoxcutter(a Applier) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + resolvedRevisionMetadata := ctx.Value(resolvedRevisionMetadataKey{}).(*RevisionMetadata) + imageFS := ctx.Value(imageFSKey{}).(fs.FS) + revisionStates := ctx.Value(revisionStatesKey{}).(*RevisionStates) + storeLbls := map[string]string{ + labels.BundleNameKey: resolvedRevisionMetadata.Name, + labels.PackageNameKey: resolvedRevisionMetadata.Package, + labels.BundleVersionKey: resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: resolvedRevisionMetadata.Image, + } + objLbls := map[string]string{ + labels.OwnerKindKey: ocv1.ClusterExtensionKind, + labels.OwnerNameKey: ext.GetName(), + } + + l.Info("applying bundle contents") + if _, _, err := a.Apply(ctx, imageFS, ext, objLbls, storeLbls); err != nil { + // If there was an error applying the resolved bundle, + // report the error via the Progressing condition. + setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) + return ctx, nil, err + } + + // Mirror Available/Progressing conditions from the installed revision + if i := revisionStates.Installed; i != nil { + for _, cndType := range []string{ocv1.ClusterExtensionRevisionTypeAvailable, ocv1.ClusterExtensionRevisionTypeProgressing} { + cnd := *apimeta.FindStatusCondition(i.Conditions, cndType) + apimeta.SetStatusCondition(&ext.Status.Conditions, cnd) + } + ext.Status.Install = &ocv1.ClusterExtensionInstallStatus{ + Bundle: i.BundleMetadata, + } + ext.Status.ActiveRevisions = []ocv1.RevisionStatus{{Name: i.RevName}} + } + for idx, r := range revisionStates.RollingOut { + rs := ocv1.RevisionStatus{Name: r.RevName} + // Mirror Progressing condition from the latest active revision + if idx == len(revisionStates.RollingOut)-1 { + pcnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + if pcnd != nil { + apimeta.SetStatusCondition(&ext.Status.Conditions, *pcnd) + } + if acnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable); pcnd.Status == metav1.ConditionFalse && acnd != nil && acnd.Status != metav1.ConditionTrue { + apimeta.SetStatusCondition(&rs.Conditions, *acnd) + } + } + if len(ext.Status.ActiveRevisions) == 0 { + ext.Status.ActiveRevisions = []ocv1.RevisionStatus{rs} + } else { + ext.Status.ActiveRevisions = append(ext.Status.ActiveRevisions, rs) + } + } + return ctx, nil, nil + } +} diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index 03ff7c6e7..38978c8b9 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -17,12 +17,10 @@ limitations under the License. package controllers import ( - "cmp" "context" "errors" "fmt" "io/fs" - "slices" "strings" "github.com/go-logr/logr" @@ -49,8 +47,6 @@ import ( "github.com/operator-framework/operator-registry/alpha/declcfg" ocv1 "github.com/operator-framework/operator-controller/api/v1" - "github.com/operator-framework/operator-controller/internal/operator-controller/authentication" - "github.com/operator-framework/operator-controller/internal/operator-controller/bundleutil" "github.com/operator-framework/operator-controller/internal/operator-controller/conditionsets" "github.com/operator-framework/operator-controller/internal/operator-controller/labels" "github.com/operator-framework/operator-controller/internal/operator-controller/resolve" @@ -62,6 +58,39 @@ const ( ClusterExtensionCleanupContentManagerCacheFinalizer = "olm.operatorframework.io/cleanup-contentmanager-cache" ) +// ReconcileStepFunc represents a single step in the ClusterExtension reconciliation process. +// It takes a context and ClusterExtension object as input and returns: +// - A potentially modified context for the next step +// - An optional reconciliation result that if non-nil will stop reconciliation +// - Any error that occurred during reconciliation, which will be returned to the caller +type ReconcileStepFunc func(context.Context, *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) +type ReconcileSteps []ReconcileStepFunc + +// Reconcile executes a series of reconciliation steps in sequence for a ClusterExtension. +// It takes a context and ClusterExtension object as input and executes each step in the ReconcileSteps slice. +// If any step returns an error, reconciliation stops and the error is returned. +// If any step returns a non-nil ctrl.Result, reconciliation stops and that result is returned. +// If any step returns a nil context, reconciliation stops successfully. +// If all steps complete successfully, returns an empty ctrl.Result and nil error. +func (steps *ReconcileSteps) Reconcile(ctx context.Context, ext *ocv1.ClusterExtension) (ctrl.Result, error) { + var res *ctrl.Result + var err error + stepCtx := ctx + for _, step := range *steps { + stepCtx, res, err = step(stepCtx, ext) + if err != nil { + return ctrl.Result{}, err + } + if res != nil { + return *res, nil + } + if stepCtx == nil { + return ctrl.Result{}, nil + } + } + return ctrl.Result{}, nil +} + // ClusterExtensionReconciler reconciles a ClusterExtension object type ClusterExtensionReconciler struct { client.Client @@ -74,6 +103,7 @@ type ClusterExtensionReconciler struct { Applier Applier RevisionStatesGetter RevisionStatesGetter Finalizers crfinalizer.Finalizers + ReconcileSteps ReconcileSteps } type StorageMigrator interface { @@ -106,7 +136,7 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req defer l.Info("reconcile ending") reconciledExt := existingExt.DeepCopy() - res, reconcileErr := r.reconcile(ctx, reconciledExt) + res, reconcileErr := r.ReconcileSteps.Reconcile(ctx, reconciledExt) // Do checks before any Update()s, as Update() may modify the resource structure! updateStatus := !equality.Semantic.DeepEqual(existingExt.Status, reconciledExt.Status) @@ -164,179 +194,6 @@ func checkForUnexpectedClusterExtensionFieldChange(a, b ocv1.ClusterExtension) b return !equality.Semantic.DeepEqual(a, b) } -// Helper function to do the actual reconcile -// -// Today we always return ctrl.Result{} and an error. -// But in the future we might update this function -// to return different results (e.g. requeue). -// -/* The reconcile functions performs the following major tasks: -1. Resolution: Run the resolution to find the bundle from the catalog which needs to be installed. -2. Validate: Ensure that the bundle returned from the resolution for install meets our requirements. -3. Unpack: Unpack the contents from the bundle and store in a localdir in the pod. -4. Install: The process of installing involves: -4.1 Converting the CSV in the bundle into a set of plain k8s objects. -4.2 Generating a chart from k8s objects. -4.3 Store the release on cluster. -*/ -//nolint:unparam -func (r *ClusterExtensionReconciler) reconcile(ctx context.Context, ext *ocv1.ClusterExtension) (ctrl.Result, error) { - l := log.FromContext(ctx) - - l.Info("handling finalizers") - finalizeResult, err := r.Finalizers.Finalize(ctx, ext) - if err != nil { - setStatusProgressing(ext, err) - return ctrl.Result{}, err - } - if finalizeResult.Updated || finalizeResult.StatusUpdated { - // On create: make sure the finalizer is applied before we do anything - // On delete: make sure we do nothing after the finalizer is removed - return ctrl.Result{}, nil - } - - if ext.GetDeletionTimestamp() != nil { - // If we've gotten here, that means the cluster extension is being deleted, we've handled all of - // _our_ finalizers (above), but the cluster extension is still present in the cluster, likely - // because there are _other_ finalizers that other controllers need to handle, (e.g. the orphan - // deletion finalizer). - return ctrl.Result{}, nil - } - - objLbls := map[string]string{ - labels.OwnerKindKey: ocv1.ClusterExtensionKind, - labels.OwnerNameKey: ext.GetName(), - } - - if r.StorageMigrator != nil { - if err := r.StorageMigrator.Migrate(ctx, ext, objLbls); err != nil { - return ctrl.Result{}, fmt.Errorf("migrating storage: %w", err) - } - } - - l.Info("getting installed bundle") - revisionStates, err := r.RevisionStatesGetter.GetRevisionStates(ctx, ext) - if err != nil { - setInstallStatus(ext, nil) - var saerr *authentication.ServiceAccountNotFoundError - if errors.As(err, &saerr) { - setInstalledStatusConditionUnknown(ext, saerr.Error()) - setStatusProgressing(ext, errors.New("installation cannot proceed due to missing ServiceAccount")) - return ctrl.Result{}, err - } - setInstalledStatusConditionUnknown(ext, err.Error()) - setStatusProgressing(ext, errors.New("retrying to get installed bundle")) - return ctrl.Result{}, err - } - - var resolvedRevisionMetadata *RevisionMetadata - if len(revisionStates.RollingOut) == 0 { - l.Info("resolving bundle") - var bm *ocv1.BundleMetadata - if revisionStates.Installed != nil { - bm = &revisionStates.Installed.BundleMetadata - } - resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolver.Resolve(ctx, ext, bm) - if err != nil { - // Note: We don't distinguish between resolution-specific errors and generic errors - setStatusProgressing(ext, err) - setInstalledStatusFromRevisionStates(ext, revisionStates) - ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error()) - return ctrl.Result{}, err - } - - // set deprecation status after _successful_ resolution - // TODO: - // 1. It seems like deprecation status should reflect the currently installed bundle, not the resolved - // bundle. So perhaps we should set package and channel deprecations directly after resolution, but - // defer setting the bundle deprecation until we successfully install the bundle. - // 2. If resolution fails because it can't find a bundle, that doesn't mean we wouldn't be able to find - // a deprecation for the ClusterExtension's spec.packageName. Perhaps we should check for a non-nil - // resolvedDeprecation even if resolution returns an error. If present, we can still update some of - // our deprecation status. - // - Open question though: what if different catalogs have different opinions of what's deprecated. - // If we can't resolve a bundle, how do we know which catalog to trust for deprecation information? - // Perhaps if the package shows up in multiple catalogs and deprecations don't match, we can set - // the deprecation status to unknown? Or perhaps we somehow combine the deprecation information from - // all catalogs? - SetDeprecationStatus(ext, resolvedBundle.Name, resolvedDeprecation) - resolvedRevisionMetadata = &RevisionMetadata{ - Package: resolvedBundle.Package, - Image: resolvedBundle.Image, - BundleMetadata: bundleutil.MetadataFor(resolvedBundle.Name, *resolvedBundleVersion), - } - } else { - resolvedRevisionMetadata = revisionStates.RollingOut[0] - } - - l.Info("unpacking resolved bundle") - imageFS, _, _, err := r.ImagePuller.Pull(ctx, ext.GetName(), resolvedRevisionMetadata.Image, r.ImageCache) - if err != nil { - // Wrap the error passed to this with the resolution information until we have successfully - // installed since we intend for the progressing condition to replace the resolved condition - // and will be removing the .status.resolution field from the ClusterExtension status API - setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) - setInstalledStatusFromRevisionStates(ext, revisionStates) - return ctrl.Result{}, err - } - - storeLbls := map[string]string{ - labels.BundleNameKey: resolvedRevisionMetadata.Name, - labels.PackageNameKey: resolvedRevisionMetadata.Package, - labels.BundleVersionKey: resolvedRevisionMetadata.Version, - labels.BundleReferenceKey: resolvedRevisionMetadata.Image, - } - - l.Info("applying bundle contents") - // NOTE: We need to be cautious of eating errors here. - // We should always return any error that occurs during an - // attempt to apply content to the cluster. Only when there is - // a verifiable reason to eat the error (i.e it is recoverable) - // should an exception be made. - // The following kinds of errors should be returned up the stack - // to ensure exponential backoff can occur: - // - Permission errors (it is not possible to watch changes to permissions. - // The only way to eventually recover from permission errors is to keep retrying). - if _, _, err := r.Applier.Apply(ctx, imageFS, ext, objLbls, storeLbls); err != nil { - // If there was an error applying the resolved bundle, - // report the error via the Progressing condition. - setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) - return ctrl.Result{}, err - } - - // Mirror Available/Progressing conditions from the installed revision - if i := revisionStates.Installed; i != nil { - for _, cndType := range []string{ocv1.ClusterExtensionRevisionTypeAvailable, ocv1.ClusterExtensionRevisionTypeProgressing} { - cnd := *apimeta.FindStatusCondition(i.Conditions, cndType) - apimeta.SetStatusCondition(&ext.Status.Conditions, cnd) - } - ext.Status.Install = &ocv1.ClusterExtensionInstallStatus{ - Bundle: i.BundleMetadata, - } - ext.Status.ActiveRevisions = []ocv1.RevisionStatus{{Name: i.RevName}} - } - for idx, r := range revisionStates.RollingOut { - rs := ocv1.RevisionStatus{Name: r.RevName} - // Mirror Progressing condition from the latest active revision - if idx == len(revisionStates.RollingOut)-1 { - pcnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) - if pcnd != nil { - apimeta.SetStatusCondition(&ext.Status.Conditions, *pcnd) - } - if acnd := apimeta.FindStatusCondition(r.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable); pcnd.Status == metav1.ConditionFalse && acnd != nil && acnd.Status != metav1.ConditionTrue { - apimeta.SetStatusCondition(&rs.Conditions, *acnd) - } - } - if len(ext.Status.ActiveRevisions) == 0 { - ext.Status.ActiveRevisions = []ocv1.RevisionStatus{rs} - } else { - ext.Status.ActiveRevisions = append(ext.Status.ActiveRevisions, rs) - } - } - - return ctrl.Result{}, nil -} - // SetDeprecationStatus will set the appropriate deprecation statuses for a ClusterExtension // based on the provided bundle func SetDeprecationStatus(ext *ocv1.ClusterExtension, bundleName string, deprecation *declcfg.Deprecation) { @@ -452,7 +309,6 @@ func (r *ClusterExtensionReconciler) SetupWithManager(mgr ctrl.Manager, opts ... for _, applyOpt := range opts { applyOpt(ctrlBuilder) } - return ctrlBuilder.Build(r) } @@ -533,55 +389,3 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o } return rs, nil } - -type BoxcutterRevisionStatesGetter struct { - Reader client.Reader -} - -func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *ocv1.ClusterExtension) (*RevisionStates, error) { - // TODO: boxcutter applier has a nearly identical bit of code for listing and sorting revisions - // only difference here is that it sorts in reverse order to start iterating with the most - // recent revisions. We should consolidate to avoid code duplication. - existingRevisionList := &ocv1.ClusterExtensionRevisionList{} - if err := d.Reader.List(ctx, existingRevisionList, client.MatchingLabels{ - ClusterExtensionRevisionOwnerLabel: ext.Name, - }); err != nil { - return nil, fmt.Errorf("listing revisions: %w", err) - } - slices.SortFunc(existingRevisionList.Items, func(a, b ocv1.ClusterExtensionRevision) int { - return cmp.Compare(a.Spec.Revision, b.Spec.Revision) - }) - - rs := &RevisionStates{} - for _, rev := range existingRevisionList.Items { - switch rev.Spec.LifecycleState { - case ocv1.ClusterExtensionRevisionLifecycleStateActive, - ocv1.ClusterExtensionRevisionLifecycleStatePaused: - default: - // Skip anything not active or paused, which should only be "Archived". - continue - } - - // TODO: the setting of these annotations (happens in boxcutter applier when we pass in "storageLabels") - // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate - // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. - rm := &RevisionMetadata{ - RevName: rev.Name, - Package: rev.Labels[labels.PackageNameKey], - Image: rev.Annotations[labels.BundleReferenceKey], - Conditions: rev.Status.Conditions, - BundleMetadata: ocv1.BundleMetadata{ - Name: rev.Annotations[labels.BundleNameKey], - Version: rev.Annotations[labels.BundleVersionKey], - }, - } - - if apimeta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { - rs.Installed = rm - } else { - rs.RollingOut = append(rs.RollingOut, rm) - } - } - - return rs, nil -} diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 437f62dce..95920e542 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -49,15 +49,17 @@ func TestClusterExtensionDoesNotExist(t *testing.T) { } func TestClusterExtensionShortCircuitsReconcileDuringDeletion(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - installedBundleGetterCalledErr := errors.New("revision states getter called") + + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ + Err: installedBundleGetterCalledErr, + } + }) + checkInstalledBundleGetterCalled := func(t require.TestingT, err error, args ...interface{}) { require.Equal(t, installedBundleGetterCalledErr, err) } - reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ - Err: installedBundleGetterCalledErr, - } type testCase struct { name string @@ -123,10 +125,12 @@ func TestClusterExtensionShortCircuitsReconcileDuringDeletion(t *testing.T) { func TestClusterExtensionResolutionFails(t *testing.T) { pkgName := fmt.Sprintf("non-existent-%s", rand.String(6)) - cl, reconciler := newClientAndReconciler(t) - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - return nil, nil, nil, fmt.Errorf("no package %q found", pkgName) + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + return nil, nil, nil, fmt.Errorf("no package %q found", pkgName) + }) }) + ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -190,11 +194,6 @@ func TestClusterExtensionResolutionSuccessfulUnpackFails(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - Error: tc.pullErr, - } - ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -223,19 +222,30 @@ func TestClusterExtensionResolutionSuccessfulUnpackFails(t *testing.T) { }, }, } + cl, reconciler := newClientAndReconciler(t, + func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + Error: tc.pullErr, + } + }, + func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + }, + ) + err := cl.Create(ctx, clusterExtension) require.NoError(t, err) t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.Error(t, err) @@ -270,10 +280,23 @@ func TestClusterExtensionResolutionSuccessfulUnpackFails(t *testing.T) { } func TestClusterExtensionResolutionAndUnpackSuccessfulApplierFails(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, - } + cl, reconciler := newClientAndReconciler(t, + func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + reconciler.Applier = &MockApplier{ + err: errors.New("apply failure"), + } + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -308,17 +331,7 @@ func TestClusterExtensionResolutionAndUnpackSuccessfulApplierFails(t *testing.T) t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - reconciler.Applier = &MockApplier{ - err: errors.New("apply failure"), - } + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.Error(t, err) @@ -347,12 +360,13 @@ func TestClusterExtensionResolutionAndUnpackSuccessfulApplierFails(t *testing.T) } func TestClusterExtensionServiceAccountNotFound(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ - Err: &authentication.ServiceAccountNotFoundError{ - ServiceAccountName: "missing-sa", - ServiceAccountNamespace: "default", - }} + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ + Err: &authentication.ServiceAccountNotFoundError{ + ServiceAccountName: "missing-sa", + ServiceAccountNamespace: "default", + }} + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -401,10 +415,32 @@ func TestClusterExtensionServiceAccountNotFound(t *testing.T) { } func TestClusterExtensionApplierFailsWithBundleInstalled(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, + mockApplier := &MockApplier{ + installCompleted: true, } + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + + reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, + }, + } + reconciler.Applier = mockApplier + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -439,34 +475,13 @@ func TestClusterExtensionApplierFailsWithBundleInstalled(t *testing.T) { t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - - reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ - BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, - }, - } - reconciler.Applier = &MockApplier{ - installCompleted: true, - } res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.NoError(t, err) - reconciler.Applier = &MockApplier{ - err: errors.New("apply failure"), - } + mockApplier.installCompleted = false + mockApplier.err = errors.New("apply failure") res, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) @@ -496,10 +511,23 @@ func TestClusterExtensionApplierFailsWithBundleInstalled(t *testing.T) { } func TestClusterExtensionManagerFailed(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, - } + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + reconciler.Applier = &MockApplier{ + installCompleted: true, + err: errors.New("manager fail"), + } + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -534,18 +562,6 @@ func TestClusterExtensionManagerFailed(t *testing.T) { t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - reconciler.Applier = &MockApplier{ - installCompleted: true, - err: errors.New("manager fail"), - } res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.Error(t, err) @@ -572,10 +588,23 @@ func TestClusterExtensionManagerFailed(t *testing.T) { } func TestClusterExtensionManagedContentCacheWatchFail(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, - } + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + reconciler.Applier = &MockApplier{ + installCompleted: true, + err: errors.New("watch error"), + } + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -611,18 +640,7 @@ func TestClusterExtensionManagedContentCacheWatchFail(t *testing.T) { t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - reconciler.Applier = &MockApplier{ - installCompleted: true, - err: errors.New("watch error"), - } + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.Error(t, err) @@ -649,10 +667,22 @@ func TestClusterExtensionManagedContentCacheWatchFail(t *testing.T) { } func TestClusterExtensionInstallationSucceeds(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, - } + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + reconciler.Applier = &MockApplier{ + installCompleted: true, + } + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -687,17 +717,6 @@ func TestClusterExtensionInstallationSucceeds(t *testing.T) { t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - reconciler.Applier = &MockApplier{ - installCompleted: true, - } res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) require.Equal(t, ctrl.Result{}, res) require.NoError(t, err) @@ -724,10 +743,32 @@ func TestClusterExtensionInstallationSucceeds(t *testing.T) { } func TestClusterExtensionDeleteFinalizerFails(t *testing.T) { - cl, reconciler := newClientAndReconciler(t) - reconciler.ImagePuller = &imageutil.MockPuller{ - ImageFS: fstest.MapFS{}, - } + fakeFinalizer := "fake.testfinalizer.io" + finalizersMessage := "still have finalizers" + cl, reconciler := newClientAndReconciler(t, func(reconciler *controllers.ClusterExtensionReconciler) { + reconciler.ImagePuller = &imageutil.MockPuller{ + ImageFS: fstest.MapFS{}, + } + reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { + v := bsemver.MustParse("1.0.0") + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + reconciler.Applier = &MockApplier{ + installCompleted: true, + } + reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, + }, + } + }) ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -761,27 +802,6 @@ func TestClusterExtensionDeleteFinalizerFails(t *testing.T) { require.NoError(t, err) t.Log("It sets resolution success status") t.Log("By running reconcile") - reconciler.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bsemver.Version, *declcfg.Deprecation, error) { - v := bsemver.MustParse("1.0.0") - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - fakeFinalizer := "fake.testfinalizer.io" - finalizersMessage := "still have finalizers" - reconciler.Applier = &MockApplier{ - installCompleted: true, - } - reconciler.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - Installed: &controllers.RevisionMetadata{ - BundleMetadata: ocv1.BundleMetadata{Name: "prometheus.v1.0.0", Version: "1.0.0"}, - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, - }, - } err = reconciler.Finalizers.Register(fakeFinalizer, finalizers.FinalizerFunc(func(ctx context.Context, obj client.Object) (crfinalizer.Result, error) { return crfinalizer.Result{}, errors.New(finalizersMessage) })) diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go new file mode 100644 index 000000000..b8cfd0c8a --- /dev/null +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -0,0 +1,211 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "errors" + "io/fs" + + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/finalizer" + "sigs.k8s.io/controller-runtime/pkg/log" + + ocv1 "github.com/operator-framework/operator-controller/api/v1" + "github.com/operator-framework/operator-controller/internal/operator-controller/authentication" + "github.com/operator-framework/operator-controller/internal/operator-controller/bundleutil" + "github.com/operator-framework/operator-controller/internal/operator-controller/labels" + "github.com/operator-framework/operator-controller/internal/operator-controller/resolve" + imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image" +) + +type revisionStatesKey struct{} +type resolvedRevisionMetadataKey struct{} +type imageFSKey struct{} + +func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + + l.Info("handling finalizers") + finalizeResult, err := f.Finalize(ctx, ext) + if err != nil { + setStatusProgressing(ext, err) + return ctx, nil, err + } + if finalizeResult.Updated || finalizeResult.StatusUpdated { + // On create: make sure the finalizer is applied before we do anything + // On delete: make sure we do nothing after the finalizer is removed + return ctx, &ctrl.Result{}, nil + } + + if ext.GetDeletionTimestamp() != nil { + // If we've gotten here, that means the cluster extension is being deleted, we've handled all of + // _our_ finalizers (above), but the cluster extension is still present in the cluster, likely + // because there are _other_ finalizers that other controllers need to handle, (e.g. the orphan + // deletion finalizer). + return nil, nil, nil + } + return ctx, nil, nil + } +} + +func RetrieveRevisionStates(r RevisionStatesGetter) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + l.Info("getting installed bundle") + revisionStates, err := r.GetRevisionStates(ctx, ext) + if err != nil { + setInstallStatus(ext, nil) + var saerr *authentication.ServiceAccountNotFoundError + if errors.As(err, &saerr) { + setInstalledStatusConditionUnknown(ext, saerr.Error()) + setStatusProgressing(ext, errors.New("installation cannot proceed due to missing ServiceAccount")) + return ctx, nil, err + } + setInstalledStatusConditionUnknown(ext, err.Error()) + setStatusProgressing(ext, errors.New("retrying to get installed bundle")) + return ctx, nil, err + } + return context.WithValue(ctx, revisionStatesKey{}, revisionStates), nil, nil + } +} + +func RetrieveRevisionMetadata(r resolve.Resolver) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + revisionStates := ctx.Value(revisionStatesKey{}).(*RevisionStates) + var resolvedRevisionMetadata *RevisionMetadata + if len(revisionStates.RollingOut) == 0 { + l.Info("resolving bundle") + var bm *ocv1.BundleMetadata + if revisionStates.Installed != nil { + bm = &revisionStates.Installed.BundleMetadata + } + resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolve(ctx, ext, bm) + if err != nil { + // Note: We don't distinguish between resolution-specific errors and generic errors + setStatusProgressing(ext, err) + setInstalledStatusFromRevisionStates(ext, revisionStates) + ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error()) + return ctx, nil, err + } + + // set deprecation status after _successful_ resolution + // TODO: + // 1. It seems like deprecation status should reflect the currently installed bundle, not the resolved + // bundle. So perhaps we should set package and channel deprecations directly after resolution, but + // defer setting the bundle deprecation until we successfully install the bundle. + // 2. If resolution fails because it can't find a bundle, that doesn't mean we wouldn't be able to find + // a deprecation for the ClusterExtension's spec.packageName. Perhaps we should check for a non-nil + // resolvedDeprecation even if resolution returns an error. If present, we can still update some of + // our deprecation status. + // - Open question though: what if different catalogs have different opinions of what's deprecated. + // If we can't resolve a bundle, how do we know which catalog to trust for deprecation information? + // Perhaps if the package shows up in multiple catalogs and deprecations don't match, we can set + // the deprecation status to unknown? Or perhaps we somehow combine the deprecation information from + // all catalogs? + SetDeprecationStatus(ext, resolvedBundle.Name, resolvedDeprecation) + resolvedRevisionMetadata = &RevisionMetadata{ + Package: resolvedBundle.Package, + Image: resolvedBundle.Image, + BundleMetadata: bundleutil.MetadataFor(resolvedBundle.Name, *resolvedBundleVersion), + } + } else { + resolvedRevisionMetadata = revisionStates.RollingOut[0] + } + return context.WithValue(ctx, resolvedRevisionMetadataKey{}, resolvedRevisionMetadata), nil, nil + } +} + +func UnpackBundle(i imageutil.Puller, cache imageutil.Cache) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + l.Info("unpacking resolved bundle") + resolvedRevisionMetadata := ctx.Value(resolvedRevisionMetadataKey{}).(*RevisionMetadata) + imageFS, _, _, err := i.Pull(ctx, ext.GetName(), resolvedRevisionMetadata.Image, cache) + if err != nil { + revisionStates := ctx.Value(revisionStatesKey{}).(*RevisionStates) + // Wrap the error passed to this with the resolution information until we have successfully + // installed since we intend for the progressing condition to replace the resolved condition + // and will be removing the .status.resolution field from the ClusterExtension status API + setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) + setInstalledStatusFromRevisionStates(ext, revisionStates) + return ctx, nil, err + } + return context.WithValue(ctx, imageFSKey{}, imageFS), nil, nil + } +} + +func ApplyBundle(a Applier) ReconcileStepFunc { + return func(ctx context.Context, ext *ocv1.ClusterExtension) (context.Context, *ctrl.Result, error) { + l := log.FromContext(ctx) + resolvedRevisionMetadata := ctx.Value(resolvedRevisionMetadataKey{}).(*RevisionMetadata) + imageFS := ctx.Value(imageFSKey{}).(fs.FS) + revisionStates := ctx.Value(revisionStatesKey{}).(*RevisionStates) + storeLbls := map[string]string{ + labels.BundleNameKey: resolvedRevisionMetadata.Name, + labels.PackageNameKey: resolvedRevisionMetadata.Package, + labels.BundleVersionKey: resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: resolvedRevisionMetadata.Image, + } + objLbls := map[string]string{ + labels.OwnerKindKey: ocv1.ClusterExtensionKind, + labels.OwnerNameKey: ext.GetName(), + } + + l.Info("applying bundle contents") + // NOTE: We need to be cautious of eating errors here. + // We should always return any error that occurs during an + // attempt to apply content to the cluster. Only when there is + // a verifiable reason to eat the error (i.e it is recoverable) + // should an exception be made. + // The following kinds of errors should be returned up the stack + // to ensure exponential backoff can occur: + // - Permission errors (it is not possible to watch changes to permissions. + // The only way to eventually recover from permission errors is to keep retrying). + rolloutSucceeded, rolloutStatus, err := a.Apply(ctx, imageFS, ext, objLbls, storeLbls) + + // Set installed status + if rolloutSucceeded { + revisionStates = &RevisionStates{Installed: resolvedRevisionMetadata} + } else if err == nil && revisionStates.Installed == nil && len(revisionStates.RollingOut) == 0 { + revisionStates = &RevisionStates{RollingOut: []*RevisionMetadata{resolvedRevisionMetadata}} + } + setInstalledStatusFromRevisionStates(ext, revisionStates) + + // If there was an error applying the resolved bundle, + // report the error via the Progressing condition. + if err != nil { + setStatusProgressing(ext, wrapErrorWithResolutionInfo(resolvedRevisionMetadata.BundleMetadata, err)) + return ctx, nil, err + } else if !rolloutSucceeded { + apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{ + Type: ocv1.TypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRolloutInProgress, + Message: rolloutStatus, + ObservedGeneration: ext.GetGeneration(), + }) + } else { + setStatusProgressing(ext, nil) + } + return ctx, nil, nil + } +} diff --git a/internal/operator-controller/controllers/suite_test.go b/internal/operator-controller/controllers/suite_test.go index 02d538237..280002ef7 100644 --- a/internal/operator-controller/controllers/suite_test.go +++ b/internal/operator-controller/controllers/suite_test.go @@ -76,7 +76,9 @@ func (m *MockApplier) Apply(_ context.Context, _ fs.FS, _ *ocv1.ClusterExtension return m.installCompleted, m.installStatus, m.err } -func newClientAndReconciler(t *testing.T) (client.Client, *controllers.ClusterExtensionReconciler) { +type reconcilerOption func(*controllers.ClusterExtensionReconciler) + +func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Client, *controllers.ClusterExtensionReconciler) { cl := newClient(t) reconciler := &controllers.ClusterExtensionReconciler{ @@ -86,6 +88,20 @@ func newClientAndReconciler(t *testing.T) (client.Client, *controllers.ClusterEx }, Finalizers: crfinalizer.NewFinalizers(), } + for _, opt := range opts { + opt(reconciler) + } + reconciler.ReconcileSteps = []controllers.ReconcileStepFunc{controllers.HandleFinalizers(reconciler.Finalizers), controllers.RetrieveRevisionStates(reconciler.RevisionStatesGetter)} + if r := reconciler.Resolver; r != nil { + reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.RetrieveRevisionMetadata(r)) + } + if i := reconciler.ImagePuller; i != nil { + reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.UnpackBundle(i, reconciler.ImageCache)) + } + if a := reconciler.Applier; a != nil { + reconciler.ReconcileSteps = append(reconciler.ReconcileSteps, controllers.ApplyBundle(a)) + } + return cl, reconciler }