diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 3e3dd0da7..8d42d5c1d 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -108,6 +108,27 @@ type PostgresClusterSpec struct { // +kubebuilder:default=Retain // +optional ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"` + + // Observability contains configuration for metrics exposure features. + // +optional + Observability *PostgresObservabilityOverride `json:"observability,omitempty"` +} + +// PostgresObservabilityOverride overrides observability configuration options for PostgresClusterClass. +type PostgresObservabilityOverride struct { + + // +optional + PostgreSQL *FeatureDisableOverride `json:"postgresql,omitempty"` + + // +optional + PgBouncer *FeatureDisableOverride `json:"pgbouncer,omitempty"` +} + +type FeatureDisableOverride struct { + // Disable set to true will disable the feature even if it's enabled in the class. + // +kubebuilder:default=false + // +optional + Disabled *bool `json:"disabled,omitempty"` } // PostgresClusterResources defines references to Kubernetes resources related to the PostgresCluster, such as ConfigMaps and Secrets. diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go index 7f02e5633..74085d191 100644 --- a/api/v4/postgresclusterclass_types.go +++ b/api/v4/postgresclusterclass_types.go @@ -99,6 +99,13 @@ type PostgresClusterClassConfig struct { // +kubebuilder:default=false // +optional ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` + + // Observability contains configuration for metrics exposure. + // When enabled, creates metrics resources for clusters using this class. + // Can be overridden in PostgresCluster CR. + // +kubebuilder:default={} + // +optional + Observability *PostgresObservabilityClassConfig `json:"observability,omitempty"` } // ConnectionPoolerMode defines the PgBouncer connection pooling strategy. @@ -172,6 +179,20 @@ type PostgresClusterClassStatus struct { Phase *string `json:"phase,omitempty"` } +type PostgresObservabilityClassConfig struct { + // +optional + PostgreSQL *MetricsClassConfig `json:"postgresql,omitempty"` + // +optional + PgBouncer *MetricsClassConfig `json:"pgbouncer,omitempty"` +} + +type MetricsClassConfig struct { + // Enabled controls whether metrics resources should be created for this target. + // +kubebuilder:default=false + // +optional + Enabled *bool `json:"enabled,omitempty"` +} + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster diff --git a/cmd/main.go b/cmd/main.go index 332623f0d..b9770b34f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -55,6 +55,7 @@ import ( "github.com/splunk/splunk-operator/internal/controller" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" //+kubebuilder:scaffold:imports //extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ) @@ -69,6 +70,7 @@ func init() { utilruntime.Must(enterpriseApi.AddToScheme(scheme)) utilruntime.Must(enterpriseApiV3.AddToScheme(scheme)) utilruntime.Must(cnpgv1.AddToScheme(scheme)) + utilruntime.Must(monitoringv1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme //utilruntime.Must(extapi.AddToScheme(scheme)) } diff --git a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml index a9846e36c..082d5fad9 100644 --- a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml +++ b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml @@ -27,6 +27,11 @@ spec: cpu: "1" memory: "2Gi" connectionPoolerEnabled: true + observability: + postgresql: + enabled: true + pgbouncer: + enabled: true cnpg: # Restart method - tolerate downtime in dev @@ -36,4 +41,3 @@ spec: mode: transaction config: max_client_conn: "100" - diff --git a/docs/PostgreSQLObservabilityDashboard.json b/docs/PostgreSQLObservabilityDashboard.json new file mode 100644 index 000000000..aa0ffc765 --- /dev/null +++ b/docs/PostgreSQLObservabilityDashboard.json @@ -0,0 +1,928 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "count(count by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}))", + "legendFormat": "postgres pods", + "range": true, + "refId": "A" + } + ], + "title": "PostgreSQL Targets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "Down" + }, + "1": { + "index": 1, + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-rw-metrics\"})", + "legendFormat": "rw", + "range": true, + "refId": "A" + } + ], + "title": "RW Pooler", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "Down" + }, + "1": { + "index": 1, + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-ro-metrics\"})", + "legendFormat": "ro", + "range": true, + "refId": "A" + } + ], + "title": "RO Pooler", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "legendFormat": "archive rate", + "range": true, + "refId": "A" + } + ], + "title": "Archive Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(splunk_operator_postgres_databases{phase=\"Failed\"})", + "legendFormat": "failed", + "range": true, + "refId": "A" + } + ], + "title": "Failed Databases", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (datname) (cnpg_pg_database_size_bytes{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "legendFormat": "{{datname}}", + "range": true, + "refId": "A" + } + ], + "title": "Database Size by Database", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_active{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", + "legendFormat": "{{service}} active", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_waiting{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", + "legendFormat": "{{service}} waiting", + "range": true, + "refId": "B" + } + ], + "title": "PgBouncer Client Load", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.25 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "legendFormat": "archived WAL / sec", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(cnpg_pg_wal_files_total{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "legendFormat": "wal files total", + "range": true, + "refId": "B" + } + ], + "title": "WAL Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (phase) (splunk_operator_postgres_databases)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Fleet Database Phases", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (controller, result) (rate(splunk_operator_postgres_reconcile_total[5m]))", + "legendFormat": "{{controller}} {{result}}", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum by (controller, error_class) (rate(splunk_operator_postgres_reconcile_errors_total[5m]))", + "legendFormat": "{{controller}} errors {{error_class}}", + "range": true, + "refId": "B" + } + ], + "title": "Controller Reconcile Activity", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "postgresql", + "cnpg", + "pgbouncer", + "splunk-operator", + "reference" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "refId": "Prometheus-namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "refId": "Prometheus-cluster" + }, + "refresh": 2, + "regex": "/(.*)-postgres-metrics/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "PostgreSQL Observability Reference", + "uid": "postgresql-observability-reference", + "version": 1, + "weekStart": "" +} diff --git a/docs/PostgreSQLObservabilityDashboard.md b/docs/PostgreSQLObservabilityDashboard.md new file mode 100644 index 000000000..22343afff --- /dev/null +++ b/docs/PostgreSQLObservabilityDashboard.md @@ -0,0 +1,65 @@ +# PostgreSQL Observability Dashboard Example + +This file provides a reference Grafana dashboard for the PostgreSQL observability model described in the PostgreSQL observability notes. + +The dashboard JSON lives at: + +- [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) + +## Purpose + +This dashboard is a reference artifact only. + +It is meant to show how a Grafana dashboard could combine: + +- runtime PostgreSQL and PgBouncer metrics exposed through the `PostgresCluster` observability path +- controller metrics emitted by the PostgreSQL controllers + +It is not meant to imply that Grafana runtime resources are managed by the operator. + +## Panels Included + +The sample dashboard includes: + +- PostgreSQL target count +- RW and RO PgBouncer availability +- WAL archive rate +- failed `PostgresDatabase` count +- database size by database +- PgBouncer active and waiting clients +- WAL activity +- fleet database phases +- controller reconcile activity and errors + +## Assumptions + +The sample queries assume: + +- Prometheus is scraping the PostgreSQL metrics `Service` created by the `PostgresCluster` controller +- Prometheus is scraping the PgBouncer metrics `Service` objects created for RW and RO poolers +- Prometheus series include `namespace` and `service` labels +- the cluster metrics service is named `-postgres-metrics` +- the PgBouncer metrics services are named `-pooler-rw-metrics` and `-pooler-ro-metrics` +- the controller metrics branch is present for the `splunk_operator_postgres_*` metrics + +If your Prometheus relabeling differs, you may need to adjust the dashboard queries. + +## Import Notes + +To use the dashboard: + +1. Import the JSON file into Grafana. +2. Select the correct Prometheus datasource. +3. Choose the namespace. +4. Choose the cluster name using the derived `cluster` variable. + +## Notes On Candidate Metrics + +Some PgBouncer queries in the sample use metrics that are good candidates but should still be verified against actual exporter output in the merged branch: + +- `cnpg_pgbouncer_pools_cl_waiting` +- `cnpg_pgbouncer_pools_maxwait` +- `cnpg_pgbouncer_stats_avg_wait_time` +- `cnpg_pgbouncer_stats_total_wait_time` + +If those exact series are not present, keep the panel shape and replace the query with the actual exported metric name. diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 70b11c9e6..55e04ff99 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -45,7 +45,7 @@ type PostgresClusterReconciler struct { Scheme *runtime.Scheme Recorder record.EventRecorder } - +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/finalizers,verbs=update diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 9356a011f..8518541be 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -39,6 +39,7 @@ import ( clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" enterpriseApi "github.com/splunk/splunk-operator/api/v4" //+kubebuilder:scaffold:imports @@ -109,6 +110,9 @@ var _ = BeforeSuite(func(ctx context.Context) { err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = monitoringv1.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme // Create New Manager for controller diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3334011c6..77846a8e4 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -396,6 +396,37 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } + if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)); err != nil { + return ctrl.Result{}, err + } + + poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) + rwPoolerMetricsEnabled := poolerMetricsEnabled && rwPoolerExists + roPoolerMetricsEnabled := poolerMetricsEnabled && roPoolerExists + if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { + return ctrl.Result{}, err + } + if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { + return ctrl.Result{}, err + } + + if err := reconcilePostgreSQLMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass), + ); err != nil { + return ctrl.Result{}, err + } + + if err := reconcileConnectionPoolerMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled, + ); err != nil { + return ctrl.Result{}, err + } + if err := reconcileConnectionPoolerMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled, + ); err != nil { + return ctrl.Result{}, err + } + // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster healthy, reconciling ConfigMap") diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go new file mode 100644 index 000000000..c2f1267f8 --- /dev/null +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -0,0 +1,483 @@ +package core + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + // metrics + postgresMetricsServiceSuffix = "-postgres-metrics" + postgresMetricsPortName = "metrics" + postgresMetricsPort = int32(9187) + poolerMetricsPortName = "metrics" + poolerMetricsPort = int32(9127) + + // labels + labelManagedBy = "app.kubernetes.io/managed-by" + labelManagedByValue = "postgrescluster-controller" + labelObservabilityComponent = "enterprise.splunk.com/observability-component" + cnpgClusterLabelName = "cnpg.io/cluster" + cnpgPoolerNameLabel = "cnpg.io/poolerName" + cnpgPodRoleInstance = "instance" + cnpgPodRoleLabelName = "cnpg.io/podRole" +) + +func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + return false + } + classCfg := class.Spec.Config.Observability.PostgreSQL + if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { + return false + } + if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PostgreSQL == nil { + return true + } + override := cluster.Spec.Observability.PostgreSQL.Disabled + return override == nil || !*override +} + +func isConnectionPoolerEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if class == nil || class.Spec.Config == nil || class.Spec.Config.ConnectionPoolerEnabled == nil { + return false + } + if !*class.Spec.Config.ConnectionPoolerEnabled { + return false + } + if cluster == nil || cluster.Spec.ConnectionPoolerEnabled == nil { + return true + } + return *cluster.Spec.ConnectionPoolerEnabled +} + +func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if !isConnectionPoolerEnabled(cluster, class) { + return false + } + if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + return false + } + classCfg := class.Spec.Config.Observability.PgBouncer + if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { + return false + } + if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PgBouncer == nil { + return true + } + override := cluster.Spec.Observability.PgBouncer.Disabled + return override == nil || !*override +} + +func buildPostgreSQLMetricsService(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.Service, error) { + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + postgresMetricsServiceSuffix, + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + cnpgClusterLabelName: cluster.Name, + cnpgPodRoleLabelName: cnpgPodRoleInstance, + }, + Ports: []corev1.ServicePort{ + { + Name: postgresMetricsPortName, + Port: postgresMetricsPort, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromString(postgresMetricsPortName), + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) + } + + return svc, nil +} + +func poolerMetricsServiceName(clusterName, poolerType string) string { + return fmt.Sprintf("%s-pooler-%s-metrics", clusterName, poolerType) +} +func buildConnectionPoolerMetricsService( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, +) (*corev1.Service, error) { + poolerName := poolerResourceName(cluster.Name, poolerType) + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerMetricsServiceName(cluster.Name, poolerType), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + cnpgPoolerNameLabel: poolerName, + }, + Ports: []corev1.ServicePort{ + { + Name: poolerMetricsPortName, + Port: poolerMetricsPort, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromString(poolerMetricsPortName), + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) + } + + return svc, nil +} + +func reconcilePostgreSQLMetricsService(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, enabled bool) error { + logger := log.FromContext(ctx) + serviceName := cluster.Name + postgresMetricsServiceSuffix + + if !enabled { + existing := &corev1.Service{} + err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PostgreSQL metrics Service %s: %w", serviceName, err) + } + + logger.Info("Deleting PostgreSQL metrics Service", "name", serviceName) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PostgreSQL metrics Service %s: %w", serviceName, err) + } + return nil + } + + desired, err := buildPostgreSQLMetricsService(scheme, cluster) + if err != nil { + return fmt.Errorf("building PostgreSQL metrics Service: %w", err) + } + + live := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec.Type = desired.Spec.Type + live.Spec.Selector = desired.Spec.Selector + live.Spec.Ports = desired.Spec.Ports + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PostgreSQL metrics Service %s: %w", desired.Name, err) + } + + return nil +} + +func reconcileConnectionPoolerMetricsService( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, + enabled bool, +) error { + logger := log.FromContext(ctx) + serviceName := poolerMetricsServiceName(cluster.Name, poolerType) + + if !enabled { + existing := &corev1.Service{} + err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PgBouncer metrics Service %s: %w", serviceName, err) + } + + logger.Info("Deleting PgBouncer metrics Service", "name", serviceName, "poolerType", poolerType) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PgBouncer metrics Service %s: %w", serviceName, err) + } + return nil + } + + desired, err := buildConnectionPoolerMetricsService(scheme, cluster, poolerType) + if err != nil { + return fmt.Errorf("building PgBouncer metrics Service for %s pooler: %w", poolerType, err) + } + + live := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec.Type = desired.Spec.Type + live.Spec.Selector = desired.Spec.Selector + live.Spec.Ports = desired.Spec.Ports + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PgBouncer metrics Service %s: %w", desired.Name, err) + } + + return nil +} + +func postgresMetricsServiceMonitorName(clusterName string) string { + return clusterName + "-postgres-metrics-monitor" +} + +func poolerMetricsServiceMonitorName(clusterName, poolerType string) string { + return fmt.Sprintf("%s-pooler-%s-metrics-monitor", clusterName, poolerType) +} + +func buildPostgreSQLMetricsServiceMonitor( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, +) (*monitoringv1.ServiceMonitor, error) { + sm := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: postgresMetricsServiceMonitorName(cluster.Name), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + Port: postgresMetricsPortName, + Path: "/metrics", + Scheme: "http", + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) + } + + return sm, nil +} + +func buildConnectionPoolerMetricsServiceMonitor( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, +) (*monitoringv1.ServiceMonitor, error) { + poolerName := poolerResourceName(cluster.Name, poolerType) + + sm := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerMetricsServiceMonitorName(cluster.Name, poolerType), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + Port: poolerMetricsPortName, + Path: "/metrics", + Scheme: "http", + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) + } + + return sm, nil +} + +func reconcilePostgreSQLMetricsServiceMonitor( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + enabled bool, +) error { + logger := log.FromContext(ctx) + name := postgresMetricsServiceMonitorName(cluster.Name) + + if !enabled { + existing := &monitoringv1.ServiceMonitor{} + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PostgreSQL ServiceMonitor %s: %w", name, err) + } + + logger.Info("Deleting PostgreSQL ServiceMonitor", "name", name) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PostgreSQL ServiceMonitor %s: %w", name, err) + } + return nil + } + + desired, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) + if err != nil { + return fmt.Errorf("building PostgreSQL ServiceMonitor: %w", err) + } + + live := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec = desired.Spec + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PostgreSQL ServiceMonitor %s: %w", desired.Name, err) + } + + return nil +} + +func reconcileConnectionPoolerMetricsServiceMonitor( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, + enabled bool, +) error { + logger := log.FromContext(ctx) + name := poolerMetricsServiceMonitorName(cluster.Name, poolerType) + + if !enabled { + existing := &monitoringv1.ServiceMonitor{} + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PgBouncer ServiceMonitor %s: %w", name, err) + } + + logger.Info("Deleting PgBouncer ServiceMonitor", "name", name, "poolerType", poolerType) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PgBouncer ServiceMonitor %s: %w", name, err) + } + return nil + } + + desired, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, poolerType) + if err != nil { + return fmt.Errorf("building PgBouncer ServiceMonitor for %s pooler: %w", poolerType, err) + } + + live := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec = desired.Spec + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PgBouncer ServiceMonitor %s: %w", desired.Name, err) + } + + return nil +} diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go new file mode 100644 index 000000000..545ea25da --- /dev/null +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -0,0 +1,339 @@ +package core + +import ( + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" +) + +func TestIsPostgreSQLMetricsEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when class observability is absent", + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{}, + }, + }, + want: false, + }, + { + name: "enabled when class enables and cluster override is unset", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + ptr.To(true), + nil, + nil, + ), + want: true, + }, + { + name: "disabled when cluster override disables", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + }, + }, + }, + class: newClassWithObservability( + ptr.To(true), + nil, + nil, + ), + want: false, + }, + { + name: "disabled when class disables even if cluster has override struct", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(false)}, + }, + }, + }, + class: newClassWithObservability( + ptr.To(false), + nil, + nil, + ), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isPostgreSQLMetricsEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestIsConnectionPoolerEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when class config is absent", + class: &enterprisev4.PostgresClusterClass{}, + want: false, + }, + { + name: "inherits enabled class setting when cluster override is unset", + cluster: &enterprisev4.PostgresCluster{}, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + }, + want: true, + }, + { + name: "cluster can disable class enabled pooler", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ConnectionPoolerEnabled: ptr.To(false), + }, + }, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + }, + want: false, + }, + { + name: "class disabled wins", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(false), + }, + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isConnectionPoolerEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when pooler itself is disabled", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + nil, + ), + want: false, + }, + { + name: "enabled when pooler and pgbouncer metrics are enabled", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(true), + ), + want: true, + }, + { + name: "disabled when cluster override disables pgbouncer metrics", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PgBouncer: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + }, + }, + }, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(true), + ), + want: false, + }, + { + name: "disabled when class disables pgbouncer metrics", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(false), + ), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isConnectionPoolerMetricsEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestBuildPostgreSQLMetricsService(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + svc, err := buildPostgreSQLMetricsService(scheme, cluster) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-postgres-metrics", svc.Name) + assert.Equal(t, cluster.Namespace, svc.Namespace) + assert.Equal(t, "postgresql-metrics", svc.Labels[labelObservabilityComponent]) + assert.Equal(t, cluster.Name, svc.Labels[cnpgClusterLabelName]) + assert.Equal(t, cluster.Name, svc.Spec.Selector[cnpgClusterLabelName]) + assert.Equal(t, cnpgPodRoleInstance, svc.Spec.Selector[cnpgPodRoleLabelName]) + require.Len(t, svc.Spec.Ports, 1) + assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].Name) + assert.Equal(t, postgresMetricsPort, svc.Spec.Ports[0].Port) + assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) + assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) +} + +func TestBuildConnectionPoolerMetricsService(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + svc, err := buildConnectionPoolerMetricsService(scheme, cluster, readWriteEndpoint) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-pooler-rw-metrics", svc.Name) + assert.Equal(t, "pgbouncer-metrics", svc.Labels[labelObservabilityComponent]) + assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Labels[cnpgPoolerNameLabel]) + assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Spec.Selector[cnpgPoolerNameLabel]) + require.Len(t, svc.Spec.Ports, 1) + assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].Name) + assert.Equal(t, poolerMetricsPort, svc.Spec.Ports[0].Port) + assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) + assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) +} + +func TestBuildPostgreSQLMetricsServiceMonitor(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + sm, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-postgres-metrics-monitor", sm.Name) + assert.Equal(t, "postgresql-metrics", sm.Labels[labelObservabilityComponent]) + assert.Equal(t, cluster.Name, sm.Spec.Selector.MatchLabels[cnpgClusterLabelName]) + require.Len(t, sm.Spec.Endpoints, 1) + assert.Equal(t, postgresMetricsPortName, sm.Spec.Endpoints[0].Port) + assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) + assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) + assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) +} + +func TestBuildConnectionPoolerMetricsServiceMonitor(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + sm, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, readOnlyEndpoint) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-pooler-ro-metrics-monitor", sm.Name) + assert.Equal(t, "pgbouncer-metrics", sm.Labels[labelObservabilityComponent]) + assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Labels[cnpgPoolerNameLabel]) + assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Spec.Selector.MatchLabels[cnpgPoolerNameLabel]) + require.Len(t, sm.Spec.Endpoints, 1) + assert.Equal(t, poolerMetricsPortName, sm.Spec.Endpoints[0].Port) + assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) + assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) + assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) +} + +func newMonitoringTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, monitoringv1.AddToScheme(scheme)) + require.NoError(t, enterprisev4.AddToScheme(scheme)) + + return scheme +} + +func newTestMonitoringCluster() *enterprisev4.PostgresCluster { + return &enterprisev4.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "postgresql-cluster-dev", + Namespace: "test", + UID: "cluster-uid", + }, + } +} + +func newClassWithObservability( + postgresEnabled *bool, + poolerEnabled *bool, + pgBouncerMetricsEnabled *bool, +) *enterprisev4.PostgresClusterClass { + return &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: poolerEnabled, + Observability: &enterprisev4.PostgresObservabilityClassConfig{ + PostgreSQL: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, + PgBouncer: &enterprisev4.MetricsClassConfig{Enabled: pgBouncerMetricsEnabled}, + }, + }, + }, + } +} + +func assertMonitoringOwnerRef(t *testing.T, ownerRefs []metav1.OwnerReference, cluster *enterprisev4.PostgresCluster) { + t.Helper() + + require.Len(t, ownerRefs, 1) + assert.Equal(t, cluster.APIVersion, ownerRefs[0].APIVersion) + assert.Equal(t, cluster.Kind, ownerRefs[0].Kind) + assert.Equal(t, cluster.Name, ownerRefs[0].Name) + assert.Equal(t, cluster.UID, ownerRefs[0].UID) + require.NotNil(t, ownerRefs[0].Controller) + assert.True(t, *ownerRefs[0].Controller) +}