diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 70b11c9e6..c902222a1 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -21,7 +21,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" - clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/core" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 5687ae1f8..ccf4f8812 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -35,7 +35,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" - "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/core" ) /* diff --git a/pkg/postgresql/cluster/business/adapters/README.md b/pkg/postgresql/cluster/business/adapters/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/business/core/cluster.go similarity index 73% rename from pkg/postgresql/cluster/core/cluster.go rename to pkg/postgresql/cluster/business/core/cluster.go index e09974ec0..54bcb2344 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/business/core/cluster.go @@ -24,6 +24,8 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/core/types/constants" + "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/ports/secondary" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -38,6 +40,10 @@ import ( log "sigs.k8s.io/controller-runtime/pkg/log" ) +type PgClusterReconciler struct { + Provisioner secondary.Provisioner +} + // PostgresClusterService is the application service entry point called by the primary adapter (reconciler). func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.Request) (ctrl.Result, error) { c := rc.Client @@ -285,6 +291,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) if err != nil { @@ -445,45 +452,475 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - // Final status sync. - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase + // Aggregate component readiness from iterative health checks. + state := pgcConstants.EmptyState + conditions := []clusterReadynessCheck{ + newProvisionerHealthCheck(postgresCluster, cnpgCluster), + newPoolerHealthCheck(c, postgresCluster, poolerEnabled, poolerConfigPresent), + newConfigMapHealthCheck(c, postgresCluster), + newSecretHealthCheck(c, postgresCluster), } - if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") - if apierrors.IsConflict(err) { - logger.Info("Conflict during status update, will requeue") - return ctrl.Result{Requeue: true}, nil + + for _, check := range conditions { + componentHealth, err := check.Condition(ctx) + if err != nil { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + logger.Error(err, "Component health check reported issues", + "component", check, + "requeueAfter", componentHealth.Result.RequeueAfter, + "reason", componentHealth.Reason) + return componentHealth.Result, err + } + + if isPendingState(componentHealth.State) { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + return componentHealth.Result, nil } - return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) + state |= componentHealth.State } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase + + if state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady { + logger.Info("Reconciliation complete") + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") - poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, postgresCluster) - rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) + return ctrl.Result{RequeueAfter: retryDelay}, nil +} + +// Free to place in specific dir/place along with the p&a work. +type StateInformationDto struct { + State pgcConstants.State + Condition conditionTypes + Reason conditionReasons + Message string + Phase reconcileClusterPhases + Result ctrl.Result +} + +// a unit of work in a way, extractable. +type clusterReadynessCheck interface { + Condition(ctx context.Context) (StateInformationDto, error) +} + +type provisionerHealthCheck struct { + cluster *enterprisev4.PostgresCluster + cnpgCluster *cnpgv1.Cluster +} + +func newProvisionerHealthCheck(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerHealthCheck { + return &provisionerHealthCheck{cluster: cluster, cnpgCluster: cnpgCluster} +} + +func (c *provisionerHealthCheck) Condition(_ context.Context) (StateInformationDto, error) { + if c.cnpgCluster != nil { + c.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: c.cnpgCluster.Namespace, + Name: c.cnpgCluster.Name, + UID: c.cnpgCluster.UID, } } - logger.Info("Reconciliation complete") - return ctrl.Result{}, nil + + info := StateInformationDto{Condition: clusterReady} + + if c.cnpgCluster == nil { + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } + + switch c.cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + info.State = pgcConstants.ProvisionerReady + info.Reason = reasonCNPGClusterHealthy + info.Message = msgProvisionerHealthy + info.Phase = readyClusterPhase + return info, nil + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGProvisioning, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseSwitchover: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGSwitchover + info.Message = msgCNPGSwitchover + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseFailOver: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGFailingOver + info.Message = msgCNPGFailingOver + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGRestarting + info.Message = fmt.Sprintf(msgFmtCNPGRestarting, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGUpgrading + info.Message = fmt.Sprintf(msgFmtCNPGUpgrading, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseApplyingConfiguration: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGApplyingConfig + info.Message = msgCNPGApplyingConfiguration + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseReplicaClusterPromotion: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGPromoting + info.Message = msgCNPGPromoting + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseWaitingForUser: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGWaitingForUser + info.Message = msgCNPGWaitingForUser + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner requires user action") + case cnpgv1.PhaseUnrecoverable: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGUnrecoverable + info.Message = msgCNPGUnrecoverable + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner unrecoverable") + case cnpgv1.PhaseCannotCreateClusterObjects: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGProvisioningFailed + info.Message = msgCNPGCannotCreateObjects + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner cannot create cluster objects") + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGPluginError + info.Message = fmt.Sprintf(msgFmtCNPGPluginError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner plugin error") + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGImageError + info.Message = fmt.Sprintf(msgFmtCNPGImageError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner image error") + case "": + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + default: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } +} + +type poolerHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster + poolerEnabled bool + poolerConfigPresent bool +} + +func newPoolerHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerHealthCheck { + return &poolerHealthCheck{ + client: c, + cluster: cluster, + poolerEnabled: poolerEnabled, + poolerConfigPresent: poolerConfigPresent, + } +} + +func (p *poolerHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if !p.poolerEnabled { + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolerDisabled, + Phase: readyClusterPhase, + }, nil + } + if !p.poolerConfigPresent { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerConfigMissing, + Message: msgPoolerConfigMissing, + Phase: failedClusterPhase, + }, fmt.Errorf("pooler config missing") + } + + // TODO: Port material. + rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RW pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RO pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + if !rwExists || !roExists { + return StateInformationDto{ + State: pgcConstants.PoolerProvisioning, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersProvisioning, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + rwPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), + Namespace: p.cluster.Namespace, + }, rwPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitRWPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + roPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), + Namespace: p.cluster.Namespace, + }, roPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitROPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + if !arePoolersReady(rwPooler, roPooler) { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersNotReady, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolersReady, + Phase: readyClusterPhase, + }, nil +} + +type configMapHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newConfigMapHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapHealthCheck { + return &configMapHealthCheck{client: c, cluster: cluster} +} + +func (c *configMapHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} + if err := c.client.Get(ctx, key, cm); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf("Failed to fetch ConfigMap: %v", err), + Phase: failedClusterPhase, + }, err + } + + requiredKeys := []string{ + configKeyClusterRWEndpoint, + configKeyClusterROEndpoint, + configKeyDefaultClusterPort, + configKeySuperUserSecretRef, + } + for _, requiredKey := range requiredKeys { + if _, ok := cm.Data[requiredKey]; !ok { + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey), + Phase: failedClusterPhase, + }, fmt.Errorf("configmap missing key %s", requiredKey) + } + } + + return StateInformationDto{ + State: pgcConstants.ConfigMapReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgAccessConfigMapReady, + Phase: readyClusterPhase, + }, nil +} + +type secretHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newSecretHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *secretHealthCheck { + return &secretHealthCheck{client: c, cluster: cluster} +} + +func (s *secretHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} + if err := s.client.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: fmt.Sprintf("Failed to fetch superuser secret: %v", err), + Phase: failedClusterPhase, + }, err + } + + refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key + if refKey == "" { + refKey = secretKeyPassword + } + if _, ok := secret.Data[refKey]; !ok { + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonSuperUserSecretFailed, + Message: fmt.Sprintf(msgFmtSecretMissingKey, refKey), + Phase: failedClusterPhase, + }, fmt.Errorf("secret missing key %s", refKey) + } + + return StateInformationDto{ + State: pgcConstants.SecretReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgSuperuserSecretReady, + Phase: readyClusterPhase, + }, nil +} + +func isPendingState(state pgcConstants.State) bool { + switch state { + case pgcConstants.PoolerPending, + pgcConstants.PoolerProvisioning, + pgcConstants.PoolerConfiguring, + pgcConstants.ProvisionerPending, + pgcConstants.ProvisionerProvisioning, + pgcConstants.ProvisionerConfiguring, + pgcConstants.ConfigMapPending, + pgcConstants.ConfigMapProvisioning, + pgcConstants.ConfigMapConfiguring, + pgcConstants.SecretPending, + pgcConstants.SecretProvisioning, + pgcConstants.SecretConfiguring: + return true + default: + return false + } } // getMergedConfig overlays PostgresCluster spec on top of the class defaults. @@ -781,63 +1218,6 @@ func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev readyClusterPhase) } -// syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { - cluster.Status.ProvisionerRef = &corev1.ObjectReference{ - APIVersion: "postgresql.cnpg.io/v1", - Kind: "Cluster", - Namespace: cnpgCluster.Namespace, - Name: cnpgCluster.Name, - UID: cnpgCluster.UID, - } - - var phase reconcileClusterPhases - var condStatus metav1.ConditionStatus - var reason conditionReasons - var message string - - switch cnpgCluster.Status.Phase { - case cnpgv1.PhaseHealthy: - phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" - case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseSwitchover: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" - case cnpgv1.PhaseFailOver: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" - case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting - message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading - message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseApplyingConfiguration: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" - case cnpgv1.PhaseReplicaClusterPromotion: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" - case cnpgv1.PhaseWaitingForUser: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" - case cnpgv1.PhaseUnrecoverable: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" - case cnpgv1.PhaseCannotCreateClusterObjects: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" - case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError - message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError - message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) - case "": - phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" - default: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) - } - - return setStatus(ctx, c, cluster, clusterReady, condStatus, reason, message, phase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/business/core/cluster_unit_test.go similarity index 88% rename from pkg/postgresql/cluster/core/cluster_unit_test.go rename to pkg/postgresql/cluster/business/core/cluster_unit_test.go index e2466f54b..c7a2ee25b 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/business/core/cluster_unit_test.go @@ -6,6 +6,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/core/types/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -13,11 +14,23 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" client "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +type configMapNotFoundClient struct { + client.Client +} + +func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*corev1.ConfigMap); ok { + return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestPoolerResourceName(t *testing.T) { tests := []struct { name string @@ -1136,3 +1149,125 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { assert.Equal(t, int32(1), *ro.Spec.Instances) }) } + +func TestComponentStateTriggerConditions(t *testing.T) { + t.Parallel() + + ctx := t.Context() + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + + exampleCm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-config", + Namespace: "default", + }, + Data: map[string]string{ + "CLUSTER_RW_ENDPOINT": "pg1-rw.default", + "CLUSTER_RO_ENDPOINT": "pg1-ro.default", + "DEFAULT_CLUSTER_PORT": "5432", + "SUPER_USER_SECRET_REF": "pg1-secret", + }, + } + examplePgCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: "default", + }, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "pg1-config"}, + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + exampleSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-secret", + Namespace: "default", + }, + Data: map[string][]byte{ + "password": []byte("s3cr3t"), + }, + } + + // TODO: as soon as coupling is addressed, remove this monster of a test. + combinations := []struct { + name string + componentChecks []clusterReadynessCheck + requeue []bool + expectedResult bool + message string + }{ + { + name: "Provisioner ready, pooler pending, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, true, false), + }, + requeue: []bool{false, false}, + expectedResult: false, + message: "Provisioner is ready but pooler is pending, don't fire", + }, + { + name: "Provisioner ready, pooler ready, configMap failed, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + configMapNotFoundClient{ + Client: fake.NewClientBuilder(). + WithScheme(scheme). + Build(), + }, + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, true}, + expectedResult: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + }, + { + name: "Sync successful, all components ready.", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleCm). + Build(), + examplePgCluster.DeepCopy(), + ), + newSecretHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleSecret). + Build(), + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, false, false}, + expectedResult: true, + message: "", + }, + } + + for _, tt := range combinations { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + state := pgcConstants.EmptyState + for i, check := range tt.componentChecks { + info, _ := check.Condition(ctx) + state |= info.State + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + } + assert.Equal(t, tt.expectedResult, state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady, + tt.message) + }) + } +} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/business/core/events.go similarity index 100% rename from pkg/postgresql/cluster/core/events.go rename to pkg/postgresql/cluster/business/core/events.go diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/business/core/types.go similarity index 60% rename from pkg/postgresql/cluster/core/types.go rename to pkg/postgresql/cluster/business/core/types.go index 042a5ae82..b8a83c90d 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/business/core/types.go @@ -42,6 +42,7 @@ type MergedConfig struct { type reconcileClusterPhases string type conditionTypes string type conditionReasons string +type statusMessage = string type objectKind string const ( @@ -53,6 +54,14 @@ const ( defaultDatabaseName string = "postgres" superUsername string = "postgres" defaultPort string = "5432" + configKeyClusterRWEndpoint string = "CLUSTER_RW_ENDPOINT" + configKeyClusterROEndpoint string = "CLUSTER_RO_ENDPOINT" + configKeyClusterREndpoint string = "CLUSTER_R_ENDPOINT" + configKeyDefaultClusterPort string = "DEFAULT_CLUSTER_PORT" + configKeySuperUserName string = "SUPER_USER_NAME" + configKeySuperUserSecretRef string = "SUPER_USER_SECRET_REF" + configKeyPoolerRWEndpoint string = "CLUSTER_POOLER_RW_ENDPOINT" + configKeyPoolerROEndpoint string = "CLUSTER_POOLER_RO_ENDPOINT" secretKeyPassword string = "password" defaultSecretSuffix string = "-secret" @@ -111,4 +120,39 @@ const ( reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" reasonCNPGPluginError conditionReasons = "CNPGPluginError" reasonCNPGImageError conditionReasons = "CNPGImageError" + + // status messages — provisioner health check + msgProvisionerHealthy statusMessage = "Provisioner cluster is healthy" + msgCNPGPendingCreation statusMessage = "CNPG cluster is pending creation" + msgFmtCNPGProvisioning statusMessage = "CNPG cluster provisioning: %s" + msgCNPGSwitchover statusMessage = "Cluster changing primary node" + msgCNPGFailingOver statusMessage = "Pod missing, need to change primary" + msgFmtCNPGRestarting statusMessage = "CNPG cluster restarting: %s" + msgFmtCNPGUpgrading statusMessage = "CNPG cluster upgrading: %s" + msgCNPGApplyingConfiguration statusMessage = "Configuration change is being applied" + msgCNPGPromoting statusMessage = "Replica is being promoted to primary" + msgCNPGWaitingForUser statusMessage = "Action from the user is required" + msgCNPGUnrecoverable statusMessage = "Cluster failed, needs manual intervention" + msgCNPGCannotCreateObjects statusMessage = "Cluster resources cannot be created" + msgFmtCNPGPluginError statusMessage = "CNPG plugin error: %s" + msgFmtCNPGImageError statusMessage = "CNPG image error: %s" + msgFmtCNPGClusterPhase statusMessage = "CNPG cluster phase: %s" + + // status messages — aggregate and component readiness checks + msgAllComponentsReady statusMessage = "All components are ready" + msgPoolerDisabled statusMessage = "Connection pooler disabled" + msgPoolerConfigMissing statusMessage = "Connection pooler enabled but configuration is missing" + msgPoolersProvisioning statusMessage = "Connection poolers are being provisioned" + msgWaitRWPoolerObject statusMessage = "Waiting for RW pooler object" + msgWaitROPoolerObject statusMessage = "Waiting for RO pooler object" + msgPoolersNotReady statusMessage = "Connection poolers are not ready yet" + msgPoolersReady statusMessage = "Connection poolers are ready" + msgConfigMapRefNotPublished statusMessage = "ConfigMap reference not published yet" + msgConfigMapNotFoundYet statusMessage = "ConfigMap not found yet" + msgFmtConfigMapMissingRequiredKey statusMessage = "ConfigMap missing required key %q" + msgAccessConfigMapReady statusMessage = "Access ConfigMap is ready" + msgSecretRefNotPublished statusMessage = "Superuser secret reference not published yet" + msgSecretNotFoundYet statusMessage = "Superuser secret not found yet" + msgFmtSecretMissingKey statusMessage = "Superuser secret missing key %q" + msgSuperuserSecretReady statusMessage = "Superuser secret is ready" ) diff --git a/pkg/postgresql/cluster/business/core/types/README.md b/pkg/postgresql/cluster/business/core/types/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/postgresql/cluster/business/core/types/constants/reasons.go b/pkg/postgresql/cluster/business/core/types/constants/reasons.go new file mode 100644 index 000000000..6c53598d7 --- /dev/null +++ b/pkg/postgresql/cluster/business/core/types/constants/reasons.go @@ -0,0 +1,40 @@ +package pgcConstants + +type Reason string + +const ( + // condition reasons — clusterReady + reasonClusterClassNotFound Reason = "ClusterClassNotFound" + reasonManagedRolesFailed Reason = "ManagedRolesReconciliationFailed" + reasonClusterBuildFailed Reason = "ClusterBuildFailed" + reasonClusterBuildSucceeded Reason = "ClusterBuildSucceeded" + reasonClusterGetFailed Reason = "ClusterGetFailed" + reasonClusterPatchFailed Reason = "ClusterPatchFailed" + reasonInvalidConfiguration Reason = "InvalidConfiguration" + reasonConfigMapFailed Reason = "ConfigMapReconciliationFailed" + reasonUserSecretFailed Reason = "UserSecretReconciliationFailed" + reasonSuperUserSecretFailed Reason = "SuperUserSecretFailed" + reasonClusterDeleteFailed Reason = "ClusterDeleteFailed" + + // condition reasons — poolerReady + reasonPoolerReconciliationFailed Reason = "PoolerReconciliationFailed" + reasonPoolerConfigMissing Reason = "PoolerConfigMissing" + reasonPoolerCreating Reason = "PoolerCreating" + reasonAllInstancesReady Reason = "AllInstancesReady" + + // condition reasons — Provisioner cluster phase mapping + reasonProvisionerClusterNotHealthy Reason = "ClusterNotHealthy" + reasonProvisionerClusterHealthy Reason = "ClusterHealthy" + reasonProvisionerProvisioning Reason = "ClusterProvisioning" + reasonProvisionerSwitchover Reason = "Switchover" + reasonProvisionerFailingOver Reason = "FailingOver" + reasonProvisionerRestarting Reason = "Restarting" + reasonProvisionerUpgrading Reason = "Upgrading" + reasonProvisionerApplyingConfig Reason = "ApplyingConfiguration" + reasonProvisionerPromoting Reason = "Promoting" + reasonProvisionerWaitingForUser Reason = "WaitingForUser" + reasonProvisionerUnrecoverable Reason = "Unrecoverable" + reasonProvisionerProvisioningFailed Reason = "ProvisioningFailed" + reasonProvisionerPluginError Reason = "PluginError" + reasonProvisionerImageError Reason = "ImageError" +) diff --git a/pkg/postgresql/cluster/business/core/types/constants/state.go b/pkg/postgresql/cluster/business/core/types/constants/state.go new file mode 100644 index 000000000..bf19698c6 --- /dev/null +++ b/pkg/postgresql/cluster/business/core/types/constants/state.go @@ -0,0 +1,41 @@ +package pgcConstants + +type State uint64 + +const ( + EmptyState State = 0 + PoolerReady State = 1 << iota + PoolerPending + PoolerProvisioning + PoolerConfiguring + PoolerFailed + + ProvisionerReady + ProvisionerPending + ProvisionerProvisioning + ProvisionerConfiguring + ProvisionerFailed + + ConfigMapReady + ConfigMapPending + ConfigMapProvisioning + ConfigMapConfiguring + ConfigMapFailed + + SecretReady + SecretPending + SecretProvisioning + SecretConfiguring + SecretFailed + + ClusterReady + ClusterPending + ClusterProvisioning + ClusterConfiguring + ClusterFailed +) + +const ( + ComponentsReady = PoolerReady | ProvisionerReady | SecretReady | ConfigMapReady + OwnershipReady +) diff --git a/pkg/postgresql/cluster/business/core/types/dto/provisioner_state.go b/pkg/postgresql/cluster/business/core/types/dto/provisioner_state.go new file mode 100644 index 000000000..6ab86cb3c --- /dev/null +++ b/pkg/postgresql/cluster/business/core/types/dto/provisioner_state.go @@ -0,0 +1,3 @@ +package pgcDto + +type ProvisionerState struct{} diff --git a/pkg/postgresql/cluster/business/ports/secondary/provisioner.go b/pkg/postgresql/cluster/business/ports/secondary/provisioner.go new file mode 100644 index 000000000..6b7395fc8 --- /dev/null +++ b/pkg/postgresql/cluster/business/ports/secondary/provisioner.go @@ -0,0 +1,12 @@ +package secondary + +import ( + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/business/core/types/constants" +) + +type Provisioner interface { + PrepareSpec() + Build() error + Await() error + State() (pgcConstants.State, pgcConstants.Reason, error) +} diff --git a/pkg/postgresql/cluster/service/reconciler.go b/pkg/postgresql/cluster/service/reconciler.go new file mode 100644 index 000000000..c4833dd20 --- /dev/null +++ b/pkg/postgresql/cluster/service/reconciler.go @@ -0,0 +1 @@ +package gpClusterService diff --git a/pkg/postgresql/cluster/service/reconciler_test.go b/pkg/postgresql/cluster/service/reconciler_test.go new file mode 100644 index 000000000..c4833dd20 --- /dev/null +++ b/pkg/postgresql/cluster/service/reconciler_test.go @@ -0,0 +1 @@ +package gpClusterService