diff --git a/api/v4/zz_generated.deepcopy.go b/api/v4/zz_generated.deepcopy.go index c698411c7..722cbf214 100644 --- a/api/v4/zz_generated.deepcopy.go +++ b/api/v4/zz_generated.deepcopy.go @@ -1588,11 +1588,6 @@ func (in *PostgresDatabaseStatus) DeepCopyInto(out *PostgresDatabaseStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.ObservedGeneration != nil { - in, out := &in.ObservedGeneration, &out.ObservedGeneration - *out = new(int64) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabaseStatus. diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 0c6db9628..9695d7c47 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -18,7 +18,6 @@ package controller import ( "context" - "reflect" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" @@ -26,6 +25,7 @@ import ( dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -95,31 +95,45 @@ func (r *PostgresDatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } return ctrl.NewControllerManagedBy(mgr). - For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates( - predicate.Or( - predicate.GenerationChangedPredicate{}, - predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - return !reflect.DeepEqual( - e.ObjectOld.GetFinalizers(), - e.ObjectNew.GetFinalizers(), - ) - }, - }, - ), - )). - Owns(&cnpgv1.Database{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.Secret{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). + WithEventFilter(predicate.Funcs{GenericFunc: func(event.GenericEvent) bool { return false }}). + For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates(postgresDatabasePredicator())). + Owns(&cnpgv1.Database{}, builder.WithPredicates(postgresDatabaseCNPGDatabasePredicator())). + Owns(&corev1.Secret{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). + Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). Named("postgresdatabase"). WithOptions(controller.Options{ MaxConcurrentReconciles: DatabaseTotalWorker, }). Complete(r) } + +func postgresDatabasePredicator() predicate.Predicate { + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + if !equality.Semantic.DeepEqual(e.ObjectOld.GetDeletionTimestamp(), e.ObjectNew.GetDeletionTimestamp()) { + return true + } + return !equality.Semantic.DeepEqual(e.ObjectOld.GetFinalizers(), e.ObjectNew.GetFinalizers()) + }, + }, + ) +} + +func postgresDatabaseCNPGDatabasePredicator() predicate.Predicate { + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, okOld := e.ObjectOld.(*cnpgv1.Database) + newObj, okNew := e.ObjectNew.(*cnpgv1.Database) + if !okOld || !okNew { + return true + } + return !equality.Semantic.DeepEqual(oldObj.Status.Applied, newObj.Status.Applied) || + ownerReferencesChanged(oldObj, newObj) + }, + }, + ) +} diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index a1f5ed9ba..1cf7ffbdc 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -35,6 +35,8 @@ import ( "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -334,14 +336,41 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { expectStatusPhase(current, "Ready") - Expect(current.Status.ObservedGeneration).NotTo(BeNil()) - Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) Expect(current.Status.Databases[0].Name).To(Equal(expectedDatabase.Name)) Expect(current.Status.Databases[0].Ready).To(Equal(expectedDatabase.Ready)) Expect(current.Status.Databases[0].AdminUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].RWUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) + Expect(current.Status.ObservedGeneration).NotTo(BeNil()) + Expect(*current.Status.ObservedGeneration).To(Equal(generation)) +} + +func reconcilePostgresDatabaseToReady(ctx context.Context, scenario readyClusterScenario, poolerEnabled bool) *enterprisev4.PostgresDatabase { + seedReadyClusterScenario(ctx, scenario, poolerEnabled) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + cnpgDatabase := expectCNPGDatabaseCreated(ctx, scenario, current) + markCNPGDatabaseApplied(ctx, cnpgDatabase) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + return current } var _ = Describe("PostgresDatabase Controller", func() { @@ -386,8 +415,6 @@ var _ = Describe("PostgresDatabase Controller", func() { current := fetchPostgresDatabase(ctx, requestName) expectStatusPhase(current, "Pending") expectStatusCondition(current, "ClusterReady", metav1.ConditionFalse, "ClusterNotFound") - clusterReady := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") - Expect(clusterReady.ObservedGeneration).To(Equal(current.Generation)) }) }) }) @@ -446,6 +473,164 @@ var _ = Describe("PostgresDatabase Controller", func() { }) }) + When("owned resource drift occurs after the PostgresDatabase is ready", func() { + It("repairs configmap content drift", func() { + scenario := newReadyClusterScenario(namespace, "configmap-drift", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + configMap.Data["rw-host"] = "unexpected.example" + Expect(k8sClient.Update(ctx, configMap)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + Expect(metav1.IsControlledBy(configMap, owner)).To(BeTrue()) + }) + + It("recreates a deleted configmap", func() { + scenario := newReadyClusterScenario(namespace, "configmap-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMapName := fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: configMapName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + }) + + It("does not recreate a deleted managed user secret", func() { + scenario := newReadyClusterScenario(namespace, "secret-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secretName := fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusPhase(current, "Provisioning") + expectStatusCondition(current, "SecretsReady", metav1.ConditionFalse, "SecretsDriftDetected") + + missing := &corev1.Secret{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, missing) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + It("re-attaches ownership when a managed user secret loses its owner reference", func() { + scenario := newReadyClusterScenario(namespace, "secret-adopt", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, secret)).To(Succeed()) + secret.OwnerReferences = nil + Expect(k8sClient.Update(ctx, secret)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secret.Name, Namespace: secret.Namespace}, secret)).To(Succeed()) + Expect(metav1.IsControlledBy(secret, owner)).To(BeTrue()) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + }) + + It("creates secrets and configmaps for a newly added database while preserving existing ones", func() { + scenario := newReadyClusterScenario(namespace, "new-database", "tenant-cluster", "tenant-cnpg", "appdb") + current := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + current.Spec.Databases = append(current.Spec.Databases, enterprisev4.DatabaseDefinition{Name: "analytics"}) + Expect(k8sClient.Update(ctx, current)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + for _, secretName := range []string{ + fmt.Sprintf("%s-analytics-admin", scenario.resourceName), + fmt.Sprintf("%s-analytics-rw", scenario.resourceName), + } { + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, secret)).To(Succeed()) + } + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-analytics-config", scenario.resourceName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("dbname", "analytics")) + + existingSecret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, existingSecret)).To(Succeed()) + }) + }) + + When("postgresdatabase secondary-resource predicates run", func() { + It("treats cnpg database applied-state, create, and delete changes as drift triggers", func() { + pred := postgresDatabaseCNPGDatabasePredicator() + + oldApplied := true + newApplied := false + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{Status: cnpgv1.DatabaseStatus{Applied: &oldApplied}}, + ObjectNew: &cnpgv1.Database{Status: cnpgv1.DatabaseStatus{Applied: &newApplied}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + + It("ignores cnpg database updates that do not change readiness or ownership", func() { + pred := postgresDatabaseCNPGDatabasePredicator() + + applied := true + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "test"}, + Status: cnpgv1.DatabaseStatus{Applied: &applied}, + }, + ObjectNew: &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "test"}, + Status: cnpgv1.DatabaseStatus{Applied: &applied}, + }, + })).To(BeFalse()) + }) + + It("treats secret create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + + It("treats configmap create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + }) + When("role ownership conflicts exist", func() { It("marks the resource failed and stops provisioning dependent resources", func() { resourceName := "conflict-cluster" diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3334011c6..be3d73548 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -451,11 +451,11 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. oldPhase = *postgresCluster.Status.Phase } if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") if apierrors.IsConflict(err) { logger.Info("Conflict during status update, will requeue") return ctrl.Result{Requeue: true}, nil } + logger.Error(err, "Failed to sync status") return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) } var newPhase string diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index f84a35fd9..cf89a063f 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -29,6 +29,22 @@ import ( // Injected by the controller so the core never imports the pgx adapter directly. type NewDBRepoFunc func(ctx context.Context, host, dbName, password string) (DBRepo, error) +type secretReconcileError struct { + message string + reason conditionReasons +} + +type secretMissingPolicy int + +const ( + createSecretIfMissing secretMissingPolicy = iota + reportSecretDriftIfMissing +) + +func (e *secretReconcileError) Error() string { + return e.message +} + // PostgresDatabaseService is the application service entry point called by the primary adapter (reconciler). // newDBRepo is injected to keep the core free of pgx imports. func PostgresDatabaseService( @@ -41,14 +57,26 @@ func PostgresDatabaseService( logger := log.FromContext(ctx).WithValues("postgresDatabase", postgresDB.Name) ctx = log.IntoContext(ctx, logger) logger.Info("Reconciling PostgresDatabase") + wasReady := postgresDB.Status.Phase != nil && *postgresDB.Status.Phase == string(readyDBPhase) + previouslyProvisionedDatabases := existingDatabaseStatus(postgresDB) updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { return persistStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) } + requeueOnConflict := func(err error, action string) (ctrl.Result, error, bool) { + if !errors.IsConflict(err) { + return ctrl.Result{}, err, false + } + logger.Info("Conflict during PostgresDatabase reconciliation, will requeue", "action", action) + return ctrl.Result{Requeue: true}, nil, true + } // Finalizer: cleanup on deletion, register on creation. if postgresDB.GetDeletionTimestamp() != nil { if err := handleDeletion(ctx, rc, postgresDB); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "handling deletion"); ok { + return result, conflictErr + } logger.Error(err, "Failed to clean up PostgresDatabase") rc.emitWarning(postgresDB, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) return ctrl.Result{}, err @@ -59,9 +87,8 @@ func PostgresDatabaseService( if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) if err := c.Update(ctx, postgresDB); err != nil { - if errors.IsConflict(err) { - logger.Info("Conflict while adding finalizer, will requeue") - return ctrl.Result{Requeue: true}, nil + if result, conflictErr, ok := requeueOnConflict(err, "adding finalizer"); ok { + return result, conflictErr } logger.Error(err, "Failed to add finalizer to PostgresDatabase") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) @@ -70,24 +97,24 @@ func PostgresDatabaseService( return ctrl.Result{}, nil } - // ObservedGeneration equality means all phases completed on the current spec — nothing to do. - if postgresDB.Status.ObservedGeneration != nil && *postgresDB.Status.ObservedGeneration == postgresDB.Generation { - logger.Info("Spec unchanged and all phases complete, skipping") - return ctrl.Result{}, nil - } - // Phase: ClusterValidation cluster, err := fetchCluster(ctx, c, postgresDB) if err != nil { if errors.IsNotFound(err) { rc.emitWarning(postgresDB, EventClusterNotFound, fmt.Sprintf("PostgresCluster %s not found", postgresDB.Spec.ClusterRef.Name)) if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting cluster not found status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil } if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterInfoFetchFailed, "Can't reach Cluster CR due to transient errors", pendingDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting cluster fetch failure status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err @@ -99,6 +126,9 @@ func PostgresDatabaseService( case ClusterNotReady, ClusterNoProvisionerRef: rc.emitWarning(postgresDB, EventClusterNotReady, "Referenced PostgresCluster is not ready yet") if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting cluster provisioning status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -106,6 +136,9 @@ func PostgresDatabaseService( case ClusterReady: rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, clusterReady, EventClusterValidated, "Referenced PostgresCluster is ready") if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonClusterAvailable, "Cluster is operational", provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting cluster ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } @@ -121,6 +154,9 @@ func PostgresDatabaseService( rc.emitWarning(postgresDB, EventRoleConflict, conflictMsg) errs := []error{conflictErr} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting role conflict status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") errs = append(errs, fmt.Errorf("failed to update status: %w", statusErr)) } @@ -134,16 +170,37 @@ func PostgresDatabaseService( Name: cluster.Status.ProvisionerRef.Name, Namespace: cluster.Status.ProvisionerRef.Namespace, }, cnpgCluster); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "fetching CNPG cluster"); ok { + return result, conflictErr + } logger.Error(err, "Failed to fetch CNPG Cluster") return ctrl.Result{}, err } // Phase: CredentialProvisioning — secrets must exist before roles are patched. // CNPG rejects a PasswordSecretRef pointing at a missing secret. - if err := reconcileUserSecrets(ctx, c, rc.Scheme, postgresDB); err != nil { + if err := reconcileUserSecrets(ctx, c, rc.Scheme, postgresDB, previouslyProvisionedDatabases); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "reconciling user secrets"); ok { + return result, conflictErr + } + var driftErr *secretReconcileError + if stderrors.As(err, &driftErr) { + rc.emitWarning(postgresDB, EventRolesSecretsDriftDetected, driftErr.message) + if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, driftErr.reason, + driftErr.message, provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting secret drift status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } rc.emitWarning(postgresDB, EventUserSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, reasonSecretsCreationFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err), provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting secret failure status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err @@ -151,6 +208,9 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, secretsReady, EventSecretsReady, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(secretsReady, metav1.ConditionTrue, reasonSecretsCreated, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting secrets ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -158,9 +218,15 @@ func PostgresDatabaseService( // as databases are ready, so they are created alongside secrets. endpoints := resolveClusterEndpoints(cluster, cnpgCluster, postgresDB.Namespace) if err := reconcileRoleConfigMaps(ctx, c, rc.Scheme, postgresDB, endpoints); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "reconciling configmaps"); ok { + return result, conflictErr + } rc.emitWarning(postgresDB, EventAccessConfigFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err)) if statusErr := updateStatus(configMapsReady, metav1.ConditionFalse, reasonConfigMapsCreationFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err), provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting configmaps failure status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err @@ -168,6 +234,9 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, configMapsReady, EventConfigMapsReady, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(configMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting configmaps ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -184,6 +253,9 @@ func PostgresDatabaseService( if len(missing) > 0 { logger.Info("CNPG Cluster patch started, missing roles detected", "missing", missing) if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "patching managed roles"); ok { + return result, conflictErr + } logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) return ctrl.Result{}, err @@ -191,6 +263,9 @@ func PostgresDatabaseService( rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles, waiting for %d roles to reconcile", len(desiredUsers))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting roles waiting status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -201,6 +276,9 @@ func PostgresDatabaseService( rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, fmt.Sprintf("Role creation failed: %v", err), failedDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting role failure status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err @@ -208,6 +286,9 @@ func PostgresDatabaseService( if len(notReadyRoles) > 0 { if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for roles to be reconciled: %v", notReadyRoles), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting roles pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -215,12 +296,18 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("All %d roles reconciled", len(desiredUsers))) if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting roles ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } // Phase: DatabaseProvisioning adopted, err := reconcileCNPGDatabases(ctx, c, rc.Scheme, postgresDB, cluster) if err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "reconciling CNPG databases"); ok { + return result, conflictErr + } logger.Error(err, "Failed to reconcile CNPG Databases") rc.emitWarning(postgresDB, EventDatabasesReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err)) return ctrl.Result{}, err @@ -238,6 +325,9 @@ func PostgresDatabaseService( rc.emitOnceBeforeWait(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabaseReconciliationStarted, fmt.Sprintf("Reconciling %d databases, waiting for readiness", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for databases to be ready: %v", notReadyDBs), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting databases pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -245,6 +335,9 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabasesReady, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionTrue, reasonDatabasesAvailable, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting databases ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -281,6 +374,9 @@ func PostgresDatabaseService( rc.emitWarning(postgresDB, EventPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err)) if statusErr := updateStatus(privilegesReady, metav1.ConditionFalse, reasonPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err), provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(statusErr, "persisting privileges failure status"); ok { + return result, conflictErr + } logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err @@ -288,17 +384,22 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, privilegesReady, EventPrivilegesReady, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(privilegesReady, metav1.ConditionTrue, reasonPrivilegesGranted, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(err, "persisting privileges ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } - rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + if !wasReady { + rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + } postgresDB.Status.Databases = populateDatabaseStatus(postgresDB) postgresDB.Status.ObservedGeneration = &postgresDB.Generation if err := c.Status().Update(ctx, postgresDB); err != nil { - if errors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil + if result, conflictErr, ok := requeueOnConflict(err, "persisting final status"); ok { + return result, conflictErr } return ctrl.Result{}, fmt.Errorf("failed to persist final status: %w", err) } @@ -358,6 +459,17 @@ func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { return users } +func existingDatabaseStatus(postgresDB *enterprisev4.PostgresDatabase) map[string]struct{} { + if postgresDB.Status.Phase == nil || *postgresDB.Status.Phase != string(readyDBPhase) { + return map[string]struct{}{} + } + existing := make(map[string]struct{}, len(postgresDB.Status.Databases)) + for _, database := range postgresDB.Status.Databases { + existing[database.Name] = struct{}{} + } + return existing +} + func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { users := make([]string, 0, len(cluster.Spec.ManagedRoles)) for _, role := range cluster.Spec.ManagedRoles { @@ -484,6 +596,10 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) cnpgDB := &cnpgv1.Database{} if err := c.Get(ctx, types.NamespacedName{Name: cnpgDBName, Namespace: postgresDB.Namespace}, cnpgDB); err != nil { + if errors.IsNotFound(err) { + notReady = append(notReady, dbSpec.Name) + continue + } return nil, fmt.Errorf("getting CNPG Database %s: %w", cnpgDBName, err) } if cnpgDB.Status.Applied == nil || !*cnpgDB.Status.Applied { @@ -508,6 +624,7 @@ func applyStatus(db *enterprisev4.PostgresDatabase, conditionType conditionTypes }) p := string(phase) db.Status.Phase = &p + db.Status.ObservedGeneration = &db.Generation } func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan { @@ -787,31 +904,116 @@ func adoptResource(ctx context.Context, c client.Client, scheme *runtime.Scheme, return c.Update(ctx, obj) } -func reconcileUserSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase) error { +func secretMissingPolicyForDB(dbName string, existingDBs map[string]struct{}) secretMissingPolicy { + if _, exists := existingDBs[dbName]; exists { + return reportSecretDriftIfMissing + } + return createSecretIfMissing +} + +func reconcileUserSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, existingDatabases map[string]struct{}) error { for _, dbSpec := range postgresDB.Spec.Databases { - if err := ensureSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)); err != nil { + missingPolicy := secretMissingPolicyForDB(dbSpec.Name, existingDatabases) + if err := reconcileUserRoleSecret(ctx, c, scheme, postgresDB, dbSpec.Name, secretRoleAdmin, missingPolicy); err != nil { return err } - if err := ensureSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)); err != nil { + if err := reconcileUserRoleSecret(ctx, c, scheme, postgresDB, dbSpec.Name, secretRoleRW, missingPolicy); err != nil { return err } } return nil } +func reconcileUserRoleSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, dbName, role string, missingPolicy secretMissingPolicy) error { + roleName := adminRoleName(dbName) + if role == secretRoleRW { + roleName = rwRoleName(dbName) + } + secretName := roleSecretName(postgresDB.Name, dbName, role) + + if missingPolicy == reportSecretDriftIfMissing { + return ensureProvisionedSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + + return ensureSecret(ctx, c, scheme, postgresDB, roleName, secretName) +} + func ensureSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) if err != nil { return err } - logger := log.FromContext(ctx) - switch { - case secret == nil: + if secret == nil { + logger := log.FromContext(ctx) logger.Info("User secret creation started", "name", secretName) return createUserSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, roleName, secretName, secret) +} + +func ensureProvisionedSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { + secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) + if err != nil { + return err + } + if secret == nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is missing for previously provisioned role %s", secretName, roleName), + reason: reasonSecretsDriftDetected, + } + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, roleName, secretName, secret) +} + +func reconcileExistingSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string, secret *corev1.Secret) error { + logger := log.FromContext(ctx) + switch { case secret.Annotations[annotationRetainedFrom] == postgresDB.Name: + if err := validateManagedSecret(secret, roleName); err != nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is invalid: %v", secretName, err), + reason: reasonSecretsDriftDetected, + } + } logger.Info("Orphaned secret re-adopted", "name", secretName) return adoptResource(ctx, c, scheme, postgresDB, secret) + case metav1.IsControlledBy(secret, postgresDB): + if err := validateManagedSecret(secret, roleName); err != nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is invalid: %v", secretName, err), + reason: reasonSecretsDriftDetected, + } + } + return nil + case metav1.GetControllerOf(secret) == nil: + if err := validateManagedSecret(secret, roleName); err != nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is invalid: %v", secretName, err), + reason: reasonSecretsDriftDetected, + } + } + logger.Info("Existing secret linked to PostgresDatabase", "name", secretName) + return adoptResource(ctx, c, scheme, postgresDB, secret) + default: + owner := metav1.GetControllerOf(secret) + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is controlled by %s %s", secretName, owner.Kind, owner.Name), + reason: reasonSecretsDriftDetected, + } + } +} + +func validateManagedSecret(secret *corev1.Secret, roleName string) error { + username, ok := secret.Data["username"] + if !ok || len(username) == 0 { + return fmt.Errorf("missing username data") + } + if string(username) != roleName { + return fmt.Errorf("username %q does not match expected role %q", string(username), roleName) + } + password, ok := secret.Data[secretKeyPassword] + if !ok || len(password) == 0 { + return fmt.Errorf("missing password data") } return nil } @@ -888,7 +1090,7 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti logger.Info("Orphaned ConfigMap re-adopted", "name", cmName) delete(cm.Annotations, annotationRetainedFrom) } - if cm.CreationTimestamp.IsZero() || reAdopting { + if !metav1.IsControlledBy(cm, postgresDB) { return controllerutil.SetControllerReference(postgresDB, cm, scheme) } return nil diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 8d4da6c52..5d651a119 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -3,7 +3,6 @@ package core // The following functions are intentionally not tested directly here. // Their business logic is covered by narrower helper tests where practical, // and the remaining behavior is mostly controller-runtime orchestration: -// - PostgresDatabaseService // - patchManagedRoles // - reconcileCNPGDatabases // - handleDeletion @@ -26,9 +25,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -123,6 +125,159 @@ func testClient(t *testing.T, scheme *runtime.Scheme, objs ...client.Object) cli return builder.Build() } +func postgresDatabaseConflict(name string) error { + return apierrors.NewConflict( + schema.GroupResource{ + Group: enterprisev4.GroupVersion.Group, + Resource: "postgresdatabases", + }, + name, + errors.New("resource version conflict"), + ) +} + +func TestPostgresDatabaseServiceRequeuesOnConflict(t *testing.T) { + scheme := testScheme(t) + tests := []struct { + name string + existing *enterprisev4.PostgresDatabase + build func(*enterprisev4.PostgresDatabase) client.Client + }{ + { + name: "when adding the finalizer", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Update: func(_ context.Context, _ client.WithWatch, obj client.Object, _ ...client.UpdateOption) error { + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when persisting status", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "missing-cluster"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when status update conflicts while handling another error", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "primary"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*enterprisev4.PostgresCluster); ok { + return errors.New("temporary get failure") + } + return client.Get(ctx, key, obj, opts...) + }, + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + c := tst.build(tst.existing) + + postgresDB := &enterprisev4.PostgresDatabase{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: tst.existing.Name, Namespace: tst.existing.Namespace}, postgresDB)) + + result, err := PostgresDatabaseService( + context.Background(), + &ReconcileContext{Client: c, Scheme: scheme, Recorder: record.NewFakeRecorder(10)}, + postgresDB, + nil, + ) + + require.NoError(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, result) + }) + } +} + +func TestSecretMissingPolicyForDB(t *testing.T) { + tests := []struct { + name string + dbName string + existingDBs map[string]struct{} + want secretMissingPolicy + }{ + { + name: "creates secrets for new databases", + dbName: "payments", + existingDBs: map[string]struct{}{}, + want: createSecretIfMissing, + }, + { + name: "reports drift for previously provisioned databases", + dbName: "payments", + existingDBs: map[string]struct{}{ + "payments": {}, + }, + want: reportSecretDriftIfMissing, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + assert.Equal(t, tst.want, secretMissingPolicyForDB(tst.dbName, tst.existingDBs)) + }) + } +} + func TestGetDesiredUsers(t *testing.T) { postgresDB := &enterprisev4.PostgresDatabase{ Spec: enterprisev4.PostgresDatabaseSpec{ @@ -306,7 +461,7 @@ func TestVerifyRolesReady(t *testing.T) { }, }, }, - wantErr: "user main_db_rw reconciliation failed: [reserved role]", + wantErr: "reconciling user main_db_rw: [reserved role]", }, { name: "returns missing roles that are not reconciled yet", @@ -664,14 +819,14 @@ func TestVerifyDatabasesReady(t *testing.T) { wantNotReady: []string{"payments", "analytics"}, }, { - name: "returns error when a database is missing", + name: "returns not ready when a database is missing", objects: []client.Object{ &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, }, }, - wantErr: "getting CNPG Database primary-analytics", + wantNotReady: []string{"analytics"}, }, } @@ -1019,7 +1174,14 @@ func TestEnsureSecret(t *testing.T) { "keep": wantKeep, }, OwnerReferences: []metav1.OwnerReference{ - {UID: wantOwnerUID, Name: postgresDB.Name}, + { + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: wantOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, }, }, Data: map[string][]byte{ @@ -1041,6 +1203,77 @@ func TestEnsureSecret(t *testing.T) { require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) }) + + t.Run("returns drift error when a previously provisioned secret is missing", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + c := testClient(t, scheme) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + assert.ErrorContains(t, err, secretName) + }) + + t.Run("re-attaches owner reference when ownership was manually stripped", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{ + labelManagedBy: "splunk-operator", + labelCNPGReload: "true", + }, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string][]byte{ + "username": []byte(roleName), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + }) + + t.Run("returns drift error when an existing secret has invalid data", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + OwnerReferences: []metav1.OwnerReference{ + {UID: postgresDB.UID, Name: postgresDB.Name}, + }, + }, + Data: map[string][]byte{ + "username": []byte("wrong_user"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + assert.ErrorContains(t, err, "invalid") + }) } // Uses a fake client because the helper reconciles multiple Secret objects through the Kubernetes API. @@ -1076,7 +1309,7 @@ func TestReconcileUserSecrets(t *testing.T) { {name: "primary-analytics-rw", username: "analytics_rw"}, } - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) for _, want := range wantSecrets { @@ -1092,13 +1325,13 @@ func TestReconcileUserSecrets(t *testing.T) { t.Run("is idempotent when secrets already exist", func(t *testing.T) { c := testClient(t, scheme) - require.NoError(t, reconcileUserSecrets(context.Background(), c, scheme, postgresDB)) + require.NoError(t, reconcileUserSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB))) before := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: "primary-payments-admin", Namespace: postgresDB.Namespace}, before)) beforePassword := append([]byte(nil), before.Data[secretKeyPassword]...) - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) @@ -1108,6 +1341,19 @@ func TestReconcileUserSecrets(t *testing.T) { require.Len(t, after.OwnerReferences, 1) assert.Equal(t, postgresDB.UID, after.OwnerReferences[0].UID) }) + + t.Run("does not recreate missing secrets for previously provisioned databases", func(t *testing.T) { + postgresDB.Status.Phase = strPtr(string(readyDBPhase)) + postgresDB.Status.Databases = []enterprisev4.DatabaseInfo{{Name: "payments"}} + c := testClient(t, scheme) + + err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + }) } // Uses a fake client because the helper reconciles ConfigMaps through CreateOrUpdate and persists re-adoption metadata. @@ -1226,6 +1472,47 @@ func TestReconcileRoleConfigMaps(t *testing.T) { BlockOwnerDeletion: boolPtr(true), }) }) + + t.Run("re-attaches owner reference when configmap ownership was manually stripped", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + }, + }, + } + cmName := "primary-payments-config" + existing := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: "splunk-operator"}, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string]string{"dbname": "payments"}, + } + c := testClient(t, scheme, existing) + + err := reconcileRoleConfigMaps(context.Background(), c, scheme, postgresDB, endpoints) + + require.NoError(t, err) + + got := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cmName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + assert.Equal(t, buildDatabaseConfigMapBody("payments", endpoints), got.Data) + }) } func TestBuildDeletionPlan(t *testing.T) { diff --git a/pkg/postgresql/database/core/events.go b/pkg/postgresql/database/core/events.go index 987b8bbfb..9bb4cbe17 100644 --- a/pkg/postgresql/database/core/events.go +++ b/pkg/postgresql/database/core/events.go @@ -23,6 +23,7 @@ const ( EventClusterNotReady = "ClusterNotReady" EventRoleConflict = "RoleConflict" EventUserSecretsFailed = "UserSecretsFailed" + EventRolesSecretsDriftDetected = "RolesSecretsDriftDetected" EventAccessConfigFailed = "AccessConfigFailed" EventManagedRolesPatchFailed = "ManagedRolesPatchFailed" EventRoleFailed = "RoleFailed" diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index bf07fd19f..6f336c481 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -71,6 +71,7 @@ const ( reasonDatabasesAvailable conditionReasons = "DatabasesAvailable" reasonSecretsCreated conditionReasons = "SecretsCreated" reasonSecretsCreationFailed conditionReasons = "SecretsCreationFailed" + reasonSecretsDriftDetected conditionReasons = "SecretsDriftDetected" reasonWaitingForCNPG conditionReasons = "WaitingForCNPG" reasonUsersCreationFailed conditionReasons = "UsersCreationFailed" reasonUsersAvailable conditionReasons = "UsersAvailable"