From ef819482646d7bbf82567aa21a0ee48db6ee82dc Mon Sep 17 00:00:00 2001 From: Guangning E Date: Tue, 6 Jan 2026 17:44:47 +0800 Subject: [PATCH] Support use kubelet scrape container metrics --- .../prometheus/prometheus-configmap.yaml | 43 +++++++++++++++++++ .../prometheus/pulsar-operators-rbac.yaml | 5 +++ charts/sn-platform-slim/values.yaml | 7 +++ .../prometheus/prometheus-configmap.yaml | 43 +++++++++++++++++++ .../prometheus/pulsar-operators-rbac.yaml | 5 +++ charts/sn-platform/values.yaml | 7 +++ 6 files changed, 110 insertions(+) diff --git a/charts/sn-platform-slim/templates/prometheus/prometheus-configmap.yaml b/charts/sn-platform-slim/templates/prometheus/prometheus-configmap.yaml index 36443c15c..a71cbf0bb 100644 --- a/charts/sn-platform-slim/templates/prometheus/prometheus-configmap.yaml +++ b/charts/sn-platform-slim/templates/prometheus/prometheus-configmap.yaml @@ -145,6 +145,49 @@ data: target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor {{- end }} +{{- if .Values.prometheus.kubelet }} + - job_name: kubelet + honor_labels: true + kubernetes_sd_configs: + - role: node + scrape_interval: 60s + scrape_timeout: 20s + metrics_path: /metrics + {{- if contains .Values.prometheus.kubelet.scheme "http" }} + scheme: http + {{- else if contains .Values.prometheus.kubelet.scheme "https" }} + scheme: https + authorization: + type: Bearer + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + {{- end }} + relabel_configs: + - source_labels: + - __meta_kubernetes_node_name + target_label: node + - regex: __meta_kubernetes_node_label_(.+) + action: labelmap + - source_labels: + - __address__ + separator: ; + target_label: __address__ + regex: ([^:]+)(?::\d+)? + {{- if contains .Values.prometheus.kubelet.scheme "http" }} + replacement: $1:10255 + {{- else if contains .Values.prometheus.kubelet.scheme "https" }} + replacement: $1:10250 + {{- end }} + action: replace + - action: labeldrop + regex: "(eks|beta_kubernetes_io|cloud_google|iam_gke_io|gke|kubernetes_io|node_kubernetes_io|failure_domain|projectcalico)_.*" + metric_relabel_configs: + - source_labels: [__name__] + regex: "storage_operation_duration_seconds_bucket|kubelet_runtime_operations_duration_seconds_bucket|kubelet_http_requests_duration_seconds_bucket" + action: drop +{{- end }} {{- if .Values.prometheus.extraScrapeConfigs -}} {{- with .Values.prometheus.extraScrapeConfigs }} {{ toYaml . | indent 4 }} diff --git a/charts/sn-platform-slim/templates/prometheus/pulsar-operators-rbac.yaml b/charts/sn-platform-slim/templates/prometheus/pulsar-operators-rbac.yaml index 8381bd3be..2fcc4866e 100644 --- a/charts/sn-platform-slim/templates/prometheus/pulsar-operators-rbac.yaml +++ b/charts/sn-platform-slim/templates/prometheus/pulsar-operators-rbac.yaml @@ -15,6 +15,11 @@ rules: resources: - nodes - nodes/proxy + {{- if .Values.prometheus.kubelet }} + - nodes/metrics + - nodes/stats + - nodes/spec + {{- end }} - services - endpoints - pods diff --git a/charts/sn-platform-slim/values.yaml b/charts/sn-platform-slim/values.yaml index 819e52b3d..65259c0d1 100644 --- a/charts/sn-platform-slim/values.yaml +++ b/charts/sn-platform-slim/values.yaml @@ -2026,6 +2026,13 @@ prometheus: customRelabelConfigs: [] extraScrapeConfigs: [] extraCustomArgs: {} + # Enable kubelet scraping + # When enabled, Prometheus will scrape kubelet metrics directly from nodes + kubelet: + # scheme: "https" or "http" + # Use "https" for secure kubelet endpoints (default in most clusters) + # Use "http" for insecure kubelet endpoints (requires kubelet to be configured with --read-only-port) + scheme: "https" ## Prometheus service ## templates/prometheus-service.yaml diff --git a/charts/sn-platform/templates/prometheus/prometheus-configmap.yaml b/charts/sn-platform/templates/prometheus/prometheus-configmap.yaml index 36443c15c..a71cbf0bb 100644 --- a/charts/sn-platform/templates/prometheus/prometheus-configmap.yaml +++ b/charts/sn-platform/templates/prometheus/prometheus-configmap.yaml @@ -145,6 +145,49 @@ data: target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor {{- end }} +{{- if .Values.prometheus.kubelet }} + - job_name: kubelet + honor_labels: true + kubernetes_sd_configs: + - role: node + scrape_interval: 60s + scrape_timeout: 20s + metrics_path: /metrics + {{- if contains .Values.prometheus.kubelet.scheme "http" }} + scheme: http + {{- else if contains .Values.prometheus.kubelet.scheme "https" }} + scheme: https + authorization: + type: Bearer + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + {{- end }} + relabel_configs: + - source_labels: + - __meta_kubernetes_node_name + target_label: node + - regex: __meta_kubernetes_node_label_(.+) + action: labelmap + - source_labels: + - __address__ + separator: ; + target_label: __address__ + regex: ([^:]+)(?::\d+)? + {{- if contains .Values.prometheus.kubelet.scheme "http" }} + replacement: $1:10255 + {{- else if contains .Values.prometheus.kubelet.scheme "https" }} + replacement: $1:10250 + {{- end }} + action: replace + - action: labeldrop + regex: "(eks|beta_kubernetes_io|cloud_google|iam_gke_io|gke|kubernetes_io|node_kubernetes_io|failure_domain|projectcalico)_.*" + metric_relabel_configs: + - source_labels: [__name__] + regex: "storage_operation_duration_seconds_bucket|kubelet_runtime_operations_duration_seconds_bucket|kubelet_http_requests_duration_seconds_bucket" + action: drop +{{- end }} {{- if .Values.prometheus.extraScrapeConfigs -}} {{- with .Values.prometheus.extraScrapeConfigs }} {{ toYaml . | indent 4 }} diff --git a/charts/sn-platform/templates/prometheus/pulsar-operators-rbac.yaml b/charts/sn-platform/templates/prometheus/pulsar-operators-rbac.yaml index 8381bd3be..2fcc4866e 100644 --- a/charts/sn-platform/templates/prometheus/pulsar-operators-rbac.yaml +++ b/charts/sn-platform/templates/prometheus/pulsar-operators-rbac.yaml @@ -15,6 +15,11 @@ rules: resources: - nodes - nodes/proxy + {{- if .Values.prometheus.kubelet }} + - nodes/metrics + - nodes/stats + - nodes/spec + {{- end }} - services - endpoints - pods diff --git a/charts/sn-platform/values.yaml b/charts/sn-platform/values.yaml index 8a3408f3f..d4a4af315 100644 --- a/charts/sn-platform/values.yaml +++ b/charts/sn-platform/values.yaml @@ -2107,6 +2107,13 @@ prometheus: customRelabelConfigs: [] extraScrapeConfigs: [] extraCustomArgs: {} + # Enable kubelet scraping + # When enabled, Prometheus will scrape kubelet metrics directly from nodes + kubelet: + # scheme: "https" or "http" + # Use "https" for secure kubelet endpoints (default in most clusters) + # Use "http" for insecure kubelet endpoints (requires kubelet to be configured with --read-only-port) + scheme: "https" ## Prometheus service ## templates/prometheus-service.yaml