diff --git a/api/v1beta1/clusterpromotion_types.go b/api/v1beta1/clusterpromotion_types.go index d526dc7b..a3a87718 100644 --- a/api/v1beta1/clusterpromotion_types.go +++ b/api/v1beta1/clusterpromotion_types.go @@ -240,6 +240,23 @@ type TimeWindow struct { // ManualTrigger is a placeholder to represent a manual trigger. type ManualTrigger struct { + // Delay is an optional time duration to wait after the WaitForStatus condition + // is met before proceeding with the promotion. + // +optional + Delay *metav1.Duration `json:"delay,omitempty"` + + // PreHealthCheckDeployment is a slice of resources Sveltos will deploy after the Delay + // period has elapsed and before running PostDelayHealthChecks. + // This can be used, for example, to deploy a Job that performs validation tasks. + // The PostDelayHealthChecks can then validate the successful completion of these resources (e.g., a Job). + // +optional + PreHealthCheckDeployment []PolicyRef `json:"preHealthCheckDeployment,omitempty"` + + // PostDelayHealthChecks is a slice of health checks Sveltos will run after the delay + // period has elapsed. + // +optional + PostDelayHealthChecks []libsveltosv1beta1.ValidateHealth `json:"postDelayHealthChecks,omitempty"` + // Approved, when set to true, signals to the controller that // promotion to the next stage is approved. // +optional diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index f8d93c48..3504036e 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -943,6 +943,23 @@ func (in *KustomizationRef) DeepCopy() *KustomizationRef { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ManualTrigger) DeepCopyInto(out *ManualTrigger) { *out = *in + if in.Delay != nil { + in, out := &in.Delay, &out.Delay + *out = new(v1.Duration) + **out = **in + } + if in.PreHealthCheckDeployment != nil { + in, out := &in.PreHealthCheckDeployment, &out.PreHealthCheckDeployment + *out = make([]PolicyRef, len(*in)) + copy(*out, *in) + } + if in.PostDelayHealthChecks != nil { + in, out := &in.PostDelayHealthChecks, &out.PostDelayHealthChecks + *out = make([]apiv1beta1.ValidateHealth, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Approved != nil { in, out := &in.Approved, &out.Approved *out = new(bool) diff --git a/config/crd/bases/config.projectsveltos.io_clusterpromotions.yaml b/config/crd/bases/config.projectsveltos.io_clusterpromotions.yaml index f68a8aeb..11c93394 100644 --- a/config/crd/bases/config.projectsveltos.io_clusterpromotions.yaml +++ b/config/crd/bases/config.projectsveltos.io_clusterpromotions.yaml @@ -1675,6 +1675,193 @@ spec: to the next stage. This prevents unintended immediate advancement past the next manual stage. Defaults to true. type: boolean + delay: + description: |- + Delay is an optional time duration to wait after the WaitForStatus condition + is met before proceeding with the promotion. + type: string + postDelayHealthChecks: + description: |- + PostDelayHealthChecks is a slice of health checks Sveltos will run after the delay + period has elapsed. + items: + properties: + evaluateCEL: + description: |- + EvaluateCEL contains a list of named CEL (Common Expression Language) rules. + Each rule will be evaluated in order against each object selected based on + the criteria defined above. Each rule's expression must return a boolean value + indicating whether the object is a match. + + Evaluation stops at the first rule that returns true; subsequent + rules will not be evaluated. + items: + description: CELRule defines a named CEL rule + used in EvaluateCEL. + properties: + name: + description: Name is a human-readable identifier + for the rule. + type: string + rule: + description: |- + Rule is the CEL (Common Expression Language) expression to evaluate. + It must return a bool + type: string + required: + - name + - rule + type: object + type: array + featureID: + description: |- + FeatureID is an indentifier of the feature (Helm/Kustomize/Resources) + This field indicates when to run this check. + For instance: + - if set to Helm this check will be run after all helm + charts specified in the ClusterProfile are deployed. + - if set to Resources this check will be run after the content + of all the ConfigMaps/Secrets referenced by ClusterProfile in the + PolicyRef sections is deployed + enum: + - Resources + - Helm + - Kustomize + type: string + group: + description: Group of the resource to fetch in + the managed Cluster. + type: string + kind: + description: Kind of the resource to fetch in + the managed Cluster. + minLength: 1 + type: string + labelFilters: + description: LabelFilters allows to filter resources + based on current labels. + items: + properties: + key: + description: Key is the label key + type: string + operation: + description: Operation is the comparison + operation + enum: + - Equal + - Different + - Has + - DoesNotHave + type: string + value: + description: Value is the label value + type: string + required: + - key + - operation + type: object + type: array + name: + description: Name is the name of this check + type: string + namespace: + description: |- + Namespace of the resource to fetch in the managed Cluster. + Empty for resources scoped at cluster level. + type: string + script: + description: |- + Script is a text containing a lua script. + Must return struct with field "health" + representing whether object is a match (true or false) + type: string + version: + description: Version of the resource to fetch + in the managed Cluster. + type: string + required: + - featureID + - group + - kind + - name + - version + type: object + type: array + preHealthCheckDeployment: + description: |- + PreHealthCheckDeployment is a slice of resources Sveltos will deploy after the Delay + period has elapsed and before running PostDelayHealthChecks. + This can be used, for example, to deploy a Job that performs validation tasks. + The PostDelayHealthChecks can then validate the successful completion of these resources (e.g., a Job). + items: + properties: + deploymentType: + default: Remote + description: |- + DeploymentType indicates whether resources need to be deployed + into the management cluster (local) or the managed cluster (remote) + enum: + - Local + - Remote + type: string + kind: + description: |- + Kind of the resource. Supported kinds are: + - ConfigMap/Secret + - flux GitRepository;OCIRepository;Bucket + enum: + - GitRepository + - OCIRepository + - Bucket + - ConfigMap + - Secret + type: string + name: + description: |- + Name of the referenced resource. + Name can be expressed as a template and instantiate using any cluster field. + minLength: 1 + type: string + namespace: + description: |- + Namespace of the referenced resource. + For ClusterProfile namespace can be left empty. In such a case, namespace will + be implicit set to cluster's namespace. + For Profile namespace must be left empty. Profile namespace will be used. + Namespace can be expressed as a template and instantiate using any cluster field. + type: string + optional: + default: false + description: |- + Optional indicates that the referenced resource is not mandatory. + If set to true and the resource is not found, the error will be ignored, + and Sveltos will continue processing other PolicyRefs. + type: boolean + path: + description: |- + Path to the directory containing the YAML files. + Defaults to 'None', which translates to the root path of the SourceRef. + Used only for GitRepository;OCIRepository;Bucket + type: string + tier: + default: 100 + description: |- + Tier controls the order of deployment for resources coming from different PolicyRefs + within the same ClusterProfile or Profile. + When two PolicyRefs attempt to deploy the same resource, the PolicyRef with the lowest + Tier value takes priority and deploys/updates the resource. + This priority mechanism is only checked after the parent ClusterProfile has won + the primary conflict resolution against other ClusterProfiles. + Higher Tier values represent lower priority. The default Tier value is 100. + format: int32 + minimum: 1 + type: integer + required: + - kind + - name + type: object + type: array type: object type: object x-kubernetes-validations: diff --git a/controllers/clusterpromotion_controller.go b/controllers/clusterpromotion_controller.go index df3fa34c..241e7920 100644 --- a/controllers/clusterpromotion_controller.go +++ b/controllers/clusterpromotion_controller.go @@ -825,7 +825,7 @@ func (r *ClusterPromotionReconciler) doMoveToNextStage(ctx context.Context, if currentStageSpec.Trigger.Manual != nil { logger.V(logs.LogDebug).Info("trigger is manual") - return r.canManualAdvance(clusterPromotion, currentStageName, currentStageSpec.Trigger.Manual, logger), nil + return r.canManualAdvance(ctx, clusterPromotion, currentStageName, currentStageSpec.Trigger.Manual, logger) } return true, nil @@ -837,29 +837,21 @@ func (r *ClusterPromotionReconciler) canAutoAdvance(ctx context.Context, autoTrigger *configv1beta1.AutoTrigger, logger logr.Logger) (bool, error) { if autoTrigger.Delay != nil { - currentStageStatus := getStageStatusByName(clusterPromotion, currentStageName) - if currentStageStatus == nil { - errorMsg := fmt.Sprintf("status not present for stage %s", currentStageName) - logger.V(logs.LogDebug).Info(errorMsg) - return false, errors.New(errorMsg) + requiredReadyTime, err := r.getRequiredReadyTime(clusterPromotion, currentStageName, + autoTrigger.Delay) + if err != nil { + logger.V(logs.LogDebug).Info(err.Error()) + return false, err } - now := time.Now() - - // Convert metav1.Duration (Delay) to time.Duration - delayDuration := autoTrigger.Delay.Duration - - // Calculate the required wait time (Success Time + Delay Duration) - requiredReadyTime := currentStageStatus.LastSuccessfulAppliedTime.Add(delayDuration) - // 2. Check the delay condition - if now.Before(requiredReadyTime) { + if time.Now().Before(*requiredReadyTime) { // The required delay time has NOT yet passed. message := fmt.Sprintf("Delayed: Waiting for Time Window: %s", requiredReadyTime.Format(time.RFC3339)) logger.V(logs.LogDebug).Info(message, "stage", currentStageName, - "delay", delayDuration.String(), + "delay", autoTrigger.Delay.Duration.String(), "ready_at", requiredReadyTime.Format(time.RFC3339), ) @@ -870,7 +862,8 @@ func (r *ClusterPromotionReconciler) canAutoAdvance(ctx context.Context, // --- DELAY HAS PASSED: Deploy PreHealthCheckDeployment -- stage := getStageSpecByName(clusterPromotion, currentStageName) - if err := r.reconcilePreHealthCheckDeployment(ctx, clusterPromotion, stage, logger); err != nil { + if err := r.reconcilePreHealthCheckDeployment(ctx, clusterPromotion, stage, + autoTrigger.PreHealthCheckDeployment, logger); err != nil { return false, err } @@ -1028,11 +1021,9 @@ func (r *ClusterPromotionReconciler) deleteCheckDeploymentClusterProfile(ctx con // for the given stage func (r *ClusterPromotionReconciler) reconcilePreHealthCheckDeployment(ctx context.Context, clusterPromotion *configv1beta1.ClusterPromotion, stage *configv1beta1.Stage, - logger logr.Logger) error { - - if stage.Trigger == nil || stage.Trigger.Auto == nil || - len(stage.Trigger.Auto.PreHealthCheckDeployment) == 0 { + preHealthCheckDeployment []configv1beta1.PolicyRef, logger logr.Logger) error { + if stage.Trigger == nil || len(preHealthCheckDeployment) == 0 { return nil } @@ -1066,7 +1057,7 @@ func (r *ClusterPromotionReconciler) reconcilePreHealthCheckDeployment(ctx conte ClusterSelector: stage.ClusterSelector, // 3. Set PreHealthCheckDeployment - PolicyRefs: stage.Trigger.Auto.PreHealthCheckDeployment, + PolicyRefs: preHealthCheckDeployment, } clusterProfile.Spec = desiredSpec @@ -1182,23 +1173,64 @@ func (r *ClusterPromotionReconciler) postDelayChecksAlreadyIncluded( } // canManualAdvance returns true if Sveltos should move to next stage based on Manual Trigger -func (r *ClusterPromotionReconciler) canManualAdvance(clusterPromotion *configv1beta1.ClusterPromotion, - currentStageName string, manualTrigger *configv1beta1.ManualTrigger, logger logr.Logger) bool { +func (r *ClusterPromotionReconciler) canManualAdvance(ctx context.Context, + clusterPromotion *configv1beta1.ClusterPromotion, currentStageName string, + manualTrigger *configv1beta1.ManualTrigger, logger logr.Logger) (bool, error) { + + if manualTrigger.Delay != nil { + requiredReadyTime, err := r.getRequiredReadyTime(clusterPromotion, currentStageName, + manualTrigger.Delay) + if err != nil { + logger.V(logs.LogDebug).Info(err.Error()) + return false, err + } + + // 2. Check the delay condition + if time.Now().Before(*requiredReadyTime) { + // The required delay time has NOT yet passed. + message := fmt.Sprintf("Delayed: Waiting for Time Window: %s", + requiredReadyTime.Format(time.RFC3339)) + logger.V(logs.LogDebug).Info(message, + "stage", currentStageName, + "delay", manualTrigger.Delay.Duration.String(), + "ready_at", requiredReadyTime.Format(time.RFC3339), + ) + + updateStageDescription(clusterPromotion, currentStageName, message) + return false, nil + } + } + + // --- DELAY HAS PASSED: Deploy PreHealthCheckDeployment -- + stage := getStageSpecByName(clusterPromotion, currentStageName) + if err := r.reconcilePreHealthCheckDeployment(ctx, clusterPromotion, stage, + manualTrigger.PreHealthCheckDeployment, logger); err != nil { + return false, err + } + + // --- DELAY HAS PASSED: Reconcile Post-Delay Health Checks --- + if err := r.reconcilePostDelayHealthChecks(ctx, clusterPromotion, currentStageName, + manualTrigger.PostDelayHealthChecks, logger); err != nil { + logger.V(logs.LogDebug).Info("Running Post-Promotion Health Checks") + updateStageDescription(clusterPromotion, currentStageName, "Running Post-Promotion Health Checks") + + return false, err + } if manualTrigger.Approved == nil || !(*manualTrigger.Approved) { - message := "Paused: Awaiting Manual Approva" + message := "Paused: Awaiting Manual Approval" logger.V(logs.LogDebug).Info(message) updateStageDescription(clusterPromotion, currentStageName, message) - return false + return false, nil } if manualTrigger.AutomaticReset { manualTrigger.Approved = nil } - return true + return true, nil } func (r *ClusterPromotionReconciler) cleanClusterProfiles(ctx context.Context, @@ -1313,3 +1345,22 @@ func (r *ClusterPromotionReconciler) updateStatusWithMissingLicenseError( addStageStatus(promotionScope.ClusterPromotion, firstStage.Name) updateStageStatus(promotionScope.ClusterPromotion, firstStage.Name, false, ¬EligibleError) } + +// getRequiredReadyTime returns the time when the delay is satisfied. +func (r *ClusterPromotionReconciler) getRequiredReadyTime(clusterPromotion *configv1beta1.ClusterPromotion, + currentStageName string, delay *metav1.Duration) (*time.Time, error) { + + currentStageStatus := getStageStatusByName(clusterPromotion, currentStageName) + if currentStageStatus == nil { + return nil, fmt.Errorf("status not present for stage %s", currentStageName) + } + + if currentStageStatus.LastSuccessfulAppliedTime == nil { + return nil, fmt.Errorf("LastSuccessfulAppliedTime not set for stage %s", currentStageName) + } + + // Calculate: Success Time + Delay Duration + requiredReadyTime := currentStageStatus.LastSuccessfulAppliedTime.Add(delay.Duration) + + return &requiredReadyTime, nil +} diff --git a/controllers/clusterpromotion_controller_test.go b/controllers/clusterpromotion_controller_test.go index 146b974c..7b37adc9 100644 --- a/controllers/clusterpromotion_controller_test.go +++ b/controllers/clusterpromotion_controller_test.go @@ -984,8 +984,9 @@ var _ = Describe("ClusterPromotionController", func() { reconciler := controllers.ClusterPromotionReconciler{Client: c} - canAdvance := controllers.CanManualAdvance(&reconciler, clusterPromotion, + canAdvance, err := controllers.CanManualAdvance(&reconciler, context.TODO(), clusterPromotion, stage1, clusterPromotion.Spec.Stages[0].Trigger.Manual, logger) + Expect(err).To(BeNil()) Expect(canAdvance).To(BeFalse()) // Approved is not set in ManualTrigger approved := true @@ -1003,8 +1004,9 @@ var _ = Describe("ClusterPromotionController", func() { }, } - canAdvance = controllers.CanManualAdvance(&reconciler, clusterPromotion, + canAdvance, err = controllers.CanManualAdvance(&reconciler, context.TODO(), clusterPromotion, stage1, clusterPromotion.Spec.Stages[0].Trigger.Manual, logger) + Expect(err).To(BeNil()) Expect(canAdvance).To(BeTrue()) // Approved is set to true in ManualTrigger }) diff --git a/manifest/manifest.yaml b/manifest/manifest.yaml index 56f0087f..b2ba5953 100644 --- a/manifest/manifest.yaml +++ b/manifest/manifest.yaml @@ -3727,6 +3727,193 @@ spec: to the next stage. This prevents unintended immediate advancement past the next manual stage. Defaults to true. type: boolean + delay: + description: |- + Delay is an optional time duration to wait after the WaitForStatus condition + is met before proceeding with the promotion. + type: string + postDelayHealthChecks: + description: |- + PostDelayHealthChecks is a slice of health checks Sveltos will run after the delay + period has elapsed. + items: + properties: + evaluateCEL: + description: |- + EvaluateCEL contains a list of named CEL (Common Expression Language) rules. + Each rule will be evaluated in order against each object selected based on + the criteria defined above. Each rule's expression must return a boolean value + indicating whether the object is a match. + + Evaluation stops at the first rule that returns true; subsequent + rules will not be evaluated. + items: + description: CELRule defines a named CEL rule + used in EvaluateCEL. + properties: + name: + description: Name is a human-readable identifier + for the rule. + type: string + rule: + description: |- + Rule is the CEL (Common Expression Language) expression to evaluate. + It must return a bool + type: string + required: + - name + - rule + type: object + type: array + featureID: + description: |- + FeatureID is an indentifier of the feature (Helm/Kustomize/Resources) + This field indicates when to run this check. + For instance: + - if set to Helm this check will be run after all helm + charts specified in the ClusterProfile are deployed. + - if set to Resources this check will be run after the content + of all the ConfigMaps/Secrets referenced by ClusterProfile in the + PolicyRef sections is deployed + enum: + - Resources + - Helm + - Kustomize + type: string + group: + description: Group of the resource to fetch in + the managed Cluster. + type: string + kind: + description: Kind of the resource to fetch in + the managed Cluster. + minLength: 1 + type: string + labelFilters: + description: LabelFilters allows to filter resources + based on current labels. + items: + properties: + key: + description: Key is the label key + type: string + operation: + description: Operation is the comparison + operation + enum: + - Equal + - Different + - Has + - DoesNotHave + type: string + value: + description: Value is the label value + type: string + required: + - key + - operation + type: object + type: array + name: + description: Name is the name of this check + type: string + namespace: + description: |- + Namespace of the resource to fetch in the managed Cluster. + Empty for resources scoped at cluster level. + type: string + script: + description: |- + Script is a text containing a lua script. + Must return struct with field "health" + representing whether object is a match (true or false) + type: string + version: + description: Version of the resource to fetch + in the managed Cluster. + type: string + required: + - featureID + - group + - kind + - name + - version + type: object + type: array + preHealthCheckDeployment: + description: |- + PreHealthCheckDeployment is a slice of resources Sveltos will deploy after the Delay + period has elapsed and before running PostDelayHealthChecks. + This can be used, for example, to deploy a Job that performs validation tasks. + The PostDelayHealthChecks can then validate the successful completion of these resources (e.g., a Job). + items: + properties: + deploymentType: + default: Remote + description: |- + DeploymentType indicates whether resources need to be deployed + into the management cluster (local) or the managed cluster (remote) + enum: + - Local + - Remote + type: string + kind: + description: |- + Kind of the resource. Supported kinds are: + - ConfigMap/Secret + - flux GitRepository;OCIRepository;Bucket + enum: + - GitRepository + - OCIRepository + - Bucket + - ConfigMap + - Secret + type: string + name: + description: |- + Name of the referenced resource. + Name can be expressed as a template and instantiate using any cluster field. + minLength: 1 + type: string + namespace: + description: |- + Namespace of the referenced resource. + For ClusterProfile namespace can be left empty. In such a case, namespace will + be implicit set to cluster's namespace. + For Profile namespace must be left empty. Profile namespace will be used. + Namespace can be expressed as a template and instantiate using any cluster field. + type: string + optional: + default: false + description: |- + Optional indicates that the referenced resource is not mandatory. + If set to true and the resource is not found, the error will be ignored, + and Sveltos will continue processing other PolicyRefs. + type: boolean + path: + description: |- + Path to the directory containing the YAML files. + Defaults to 'None', which translates to the root path of the SourceRef. + Used only for GitRepository;OCIRepository;Bucket + type: string + tier: + default: 100 + description: |- + Tier controls the order of deployment for resources coming from different PolicyRefs + within the same ClusterProfile or Profile. + When two PolicyRefs attempt to deploy the same resource, the PolicyRef with the lowest + Tier value takes priority and deploys/updates the resource. + This priority mechanism is only checked after the parent ClusterProfile has won + the primary conflict resolution against other ClusterProfiles. + Higher Tier values represent lower priority. The default Tier value is 100. + format: int32 + minimum: 1 + type: integer + required: + - kind + - name + type: object + type: array type: object type: object x-kubernetes-validations: