Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"time"

crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"

intController "github.com/splunk/splunk-operator/internal/controller"
Expand Down Expand Up @@ -55,6 +56,7 @@ import (
"github.com/splunk/splunk-operator/internal/controller"

cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
pgmetrics "github.com/splunk/splunk-operator/pkg/postgresql/metrics"
//+kubebuilder:scaffold:imports
//extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
)
Expand Down Expand Up @@ -282,18 +284,29 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "Telemetry")
os.Exit(1)
}
pgRecorder := pgmetrics.NewPrometheusRecorder()
if err := pgmetrics.Register(crmetrics.Registry); err != nil {
setupLog.Error(err, "unable to register PostgreSQL metrics")
os.Exit(1)
}
pgFleetCollector := pgmetrics.NewFleetCollector()

if err := (&controller.PostgresDatabaseReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("postgresdatabase-controller"),
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("postgresdatabase-controller"),
Metrics: pgRecorder,
FleetCollector: pgFleetCollector,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PostgresDatabase")
os.Exit(1)
}
if err := (&controller.PostgresClusterReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("postgrescluster-controller"),
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("postgrescluster-controller"),
Metrics: pgRecorder,
FleetCollector: pgFleetCollector,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PostgresCluster")
os.Exit(1)
Expand Down
13 changes: 9 additions & 4 deletions internal/controller/postgrescluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
enterprisev4 "github.com/splunk/splunk-operator/api/v4"
clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core"
pgmetrics "github.com/splunk/splunk-operator/pkg/postgresql/metrics"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -42,8 +43,10 @@ const (
// PostgresClusterReconciler reconciles PostgresCluster resources.
type PostgresClusterReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Scheme *runtime.Scheme
Recorder record.EventRecorder
Metrics pgmetrics.Recorder
FleetCollector *pgmetrics.FleetCollector
}

// +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -57,8 +60,10 @@ type PostgresClusterReconciler struct {
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch

func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder}
return clustercore.PostgresClusterService(ctx, rc, req)
rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics}
result, err := clustercore.PostgresClusterService(ctx, rc, req)
r.FleetCollector.CollectClusterMetrics(ctx, r.Client, r.Metrics)
return result, err
}

// SetupWithManager registers the controller and owned resource watches.
Expand Down
14 changes: 10 additions & 4 deletions internal/controller/postgresdatabase_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
enterprisev4 "github.com/splunk/splunk-operator/api/v4"
dbadapter "github.com/splunk/splunk-operator/pkg/postgresql/database/adapter"
dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core"
pgmetrics "github.com/splunk/splunk-operator/pkg/postgresql/metrics"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -42,8 +43,10 @@ import (
// PostgresDatabaseReconciler reconciles a PostgresDatabase object.
type PostgresDatabaseReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Scheme *runtime.Scheme
Recorder record.EventRecorder
Metrics pgmetrics.Recorder
FleetCollector *pgmetrics.FleetCollector
}

const (
Expand Down Expand Up @@ -71,8 +74,11 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req
}
return ctrl.Result{}, err
}
rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder}
return dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository)
rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics}
result, err := dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository)
r.FleetCollector.CollectDatabaseMetrics(ctx, r.Client, r.Metrics)

return result, err
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
13 changes: 13 additions & 0 deletions pkg/postgresql/cluster/core/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
password "github.com/sethvargo/go-password/password"
enterprisev4 "github.com/splunk/splunk-operator/api/v4"
pgmetrics "github.com/splunk/splunk-operator/pkg/postgresql/metrics"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -78,6 +79,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
}
logger.Error(err, "Failed to handle finalizer")
rc.emitWarning(postgresCluster, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err))
rc.Metrics.IncFinalizerOp(pgmetrics.ControllerCluster, pgmetrics.ResultError)
errs := []error{err}
if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed,
fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase); statusErr != nil {
Expand Down Expand Up @@ -111,6 +113,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil {
logger.Error(err, "Failed to fetch PostgresClusterClass", "className", postgresCluster.Spec.Class)
rc.emitWarning(postgresCluster, EventClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found", postgresCluster.Spec.Class))
rc.Metrics.IncValidationFailure(pgmetrics.ControllerCluster, pgmetrics.ReasonClassNotFound)
if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound,
fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase); statusErr != nil {
logger.Error(statusErr, "Failed to update status")
Expand All @@ -123,6 +126,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
if err != nil {
logger.Error(err, "Failed to merge PostgresCluster configuration")
rc.emitWarning(postgresCluster, EventConfigMergeFailed, fmt.Sprintf("Failed to merge configuration: %v", err))
rc.Metrics.IncValidationFailure(pgmetrics.ControllerCluster, pgmetrics.ReasonInvalidConfig)
if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonInvalidConfiguration,
fmt.Sprintf("Failed to merge configuration: %v", err), failedClusterPhase); statusErr != nil {
logger.Error(statusErr, "Failed to update status")
Expand Down Expand Up @@ -169,6 +173,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
return ctrl.Result{}, err
}
rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName))
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourceSecret, pgmetrics.OpCreate, pgmetrics.ResultSuccess)
logger.Info("Superuser secret ref persisted to status")
}

Expand Down Expand Up @@ -223,6 +228,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
return ctrl.Result{}, err
}
rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state")
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourceCluster, pgmetrics.OpCreate, pgmetrics.ResultSuccess)
if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded,
"CNPG Cluster created", pendingClusterPhase); statusErr != nil {
logger.Error(statusErr, "Failed to update status")
Expand Down Expand Up @@ -267,6 +273,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
return ctrl.Result{Requeue: true}, nil
}
rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state")
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourceCluster, pgmetrics.OpUpdate, pgmetrics.ResultSuccess)
logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name)
return ctrl.Result{RequeueAfter: retryDelay}, nil
}
Expand All @@ -276,6 +283,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil {
logger.Error(err, "Failed to reconcile managed roles")
rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err))
rc.Metrics.IncUserAction(pgmetrics.ActionRolePatch, pgmetrics.ResultError)
if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed,
fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase); statusErr != nil {
logger.Error(statusErr, "Failed to update status")
Expand Down Expand Up @@ -326,6 +334,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil {
logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping",
"class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name)
rc.Metrics.IncValidationFailure(pgmetrics.ControllerCluster, pgmetrics.ReasonPoolerConfigMissing)
if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing,
fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q",
postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase); statusErr != nil {
Expand All @@ -351,6 +360,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
return ctrl.Result{}, err
}
rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness")
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourcePooler, pgmetrics.OpCreate, pgmetrics.ResultSuccess)
logger.Info("Connection pooler creation started, requeueing")
if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating,
"Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil {
Expand Down Expand Up @@ -433,9 +443,11 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.
switch createOrUpdateResult {
case controllerutil.OperationResultCreated:
rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name))
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourceConfigMap, pgmetrics.OpCreate, pgmetrics.ResultSuccess)
logger.Info("ConfigMap created", "name", desiredCM.Name)
case controllerutil.OperationResultUpdated:
rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name))
rc.Metrics.IncOwnedResourceOp(pgmetrics.ControllerCluster, pgmetrics.ResourceConfigMap, pgmetrics.OpUpdate, pgmetrics.ResultSuccess)
logger.Info("ConfigMap updated", "name", desiredCM.Name)
default:
logger.Info("ConfigMap unchanged", "name", desiredCM.Name)
Expand Down Expand Up @@ -1062,6 +1074,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr
return fmt.Errorf("removing finalizer: %w", err)
}
rc.emitNormal(cluster, EventCleanupComplete, fmt.Sprintf("Cleanup complete (policy: %s)", policy))
rc.Metrics.IncFinalizerOp(pgmetrics.ControllerCluster, pgmetrics.ResultSuccess)
logger.Info("Finalizer removed, cleanup complete")
return nil
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/postgresql/cluster/core/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"time"

enterprisev4 "github.com/splunk/splunk-operator/api/v4"
pgmetrics "github.com/splunk/splunk-operator/pkg/postgresql/metrics"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
Expand All @@ -17,6 +18,7 @@ type ReconcileContext struct {
Client client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Metrics pgmetrics.Recorder
}

// normalizedCNPGClusterSpec is a subset of cnpgv1.ClusterSpec fields used for drift detection.
Expand Down
Loading
Loading