From 6768ab15096cc17348d2169deaed55dd5a7406a3 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Tue, 24 Feb 2026 10:26:55 +0000 Subject: [PATCH 01/24] feat(charts): test sync nodes --- charts/workflows-cluster/staging-values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index c48fa0463..51329e851 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -58,6 +58,8 @@ vcluster: selfHeal: true sync: fromHost: + nodes: + enabled: true secrets: mappings: byName: From eb59e109f4ccc30355d555c0a8809e8bd3a1d19b Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Tue, 24 Feb 2026 11:00:39 +0000 Subject: [PATCH 02/24] feat(charts): test kube-state-metrics --- charts/workflows-cluster/staging-values.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index 51329e851..26e9e7d56 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -25,6 +25,13 @@ vcluster: name: argo-cd version: 7.7.22 repo: https://argoproj.github.io/argo-helm + - release: + name: kube-state-metrics + namespace: kube-system + chart: + name: kube-state-metrics + version: 5.28.0 + repo: https:/prometheus-community.github.io/helm-charts - release: name: verflixt namespace: argocd From 817c3909b96bfc0a792a6c5947aca1b466b367ec Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Tue, 24 Feb 2026 11:01:14 +0000 Subject: [PATCH 03/24] feat(charts): test metrics server integrations --- charts/workflows-cluster/staging-values.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index 26e9e7d56..127289367 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -82,6 +82,14 @@ vcluster: "/postgres-application-passwords": "workflows/postgres-application-passwords" "/postgres-initdb-script": "workflows/postgres-initdb-script" + integrations: + metricsServer: + enabled: true + nodes: + enabled: true + pods: + enabled: true + ingress: secretName: letsencrypt-kubernetes-staging-workflows-diamond-ac-uk host: kubernetes.staging.workflows.diamond.ac.uk From b3ce43b38148f424ea32dd019b15c472c2ea96f1 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Tue, 24 Feb 2026 14:18:05 +0000 Subject: [PATCH 04/24] feat(charts): fix linting --- charts/workflows-cluster/staging-values.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index 127289367..5f0309d6b 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -30,7 +30,7 @@ vcluster: namespace: kube-system chart: name: kube-state-metrics - version: 5.28.0 + version: 7.1.0 repo: https:/prometheus-community.github.io/helm-charts - release: name: verflixt @@ -85,10 +85,8 @@ vcluster: integrations: metricsServer: enabled: true - nodes: - enabled: true - pods: - enabled: true + nodes: true + pods: true ingress: secretName: letsencrypt-kubernetes-staging-workflows-diamond-ac-uk From ada36b74d41849fc044ad182a37119afa663b578 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Tue, 24 Feb 2026 16:23:07 +0000 Subject: [PATCH 05/24] feat(charts): etcd pin version and typo --- charts/workflows-cluster/staging-values.yaml | 2 +- charts/workflows-cluster/values.yaml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index 5f0309d6b..5f1ce0af6 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -31,7 +31,7 @@ vcluster: chart: name: kube-state-metrics version: 7.1.0 - repo: https:/prometheus-community.github.io/helm-charts + repo: https://prometheus-community.github.io/helm-charts - release: name: verflixt namespace: argocd diff --git a/charts/workflows-cluster/values.yaml b/charts/workflows-cluster/values.yaml index 67f43c791..25dfad25f 100644 --- a/charts/workflows-cluster/values.yaml +++ b/charts/workflows-cluster/values.yaml @@ -17,6 +17,9 @@ vcluster: deploy: enabled: true statefulSet: + image: + repository: registry.k8s.io/etcd + tag: "3.6.8" extraArgs: - --quota-backend-bytes=8589934592 # 8Gi resources: From b8c4b6214dfbbd4463ac4587facb8e4908a1eaa3 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 09:53:29 +0000 Subject: [PATCH 06/24] feat(charts): specify tag --- charts/workflows-cluster/values.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/charts/workflows-cluster/values.yaml b/charts/workflows-cluster/values.yaml index 25dfad25f..8baa1e56a 100644 --- a/charts/workflows-cluster/values.yaml +++ b/charts/workflows-cluster/values.yaml @@ -18,8 +18,7 @@ vcluster: enabled: true statefulSet: image: - repository: registry.k8s.io/etcd - tag: "3.6.8" + tag: 3.6.4-0 extraArgs: - --quota-backend-bytes=8589934592 # 8Gi resources: From f3c3be047ac61dd987bc88b7daba5710fde3bb0f Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 10:05:56 +0000 Subject: [PATCH 07/24] feat(charts): test otel collector --- charts/otel-collector/values.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 51bc953d8..42014f1bd 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -70,6 +70,15 @@ opentelemetry-collector: - sources: - from: connection receivers: + k8s_cluster: + node_conditions_to_report: [Ready, MemoryPressure, DiskPressure] + allocatable_types_to_report: [cpu, memory, ephemeral-stroage, pods] + collection_interval: 30s + metrics: + k8s.node.condition: + enabled: true + k8s.pod.status_reason: + enabled: true prometheus: config: scrape_configs: @@ -145,6 +154,7 @@ opentelemetry-collector: pipelines: metrics: receivers: + - k8s_cluster - prometheus - otlp processors: From 11b63dd814d25842432866180a9ab3eee87c6f14 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 10:31:00 +0000 Subject: [PATCH 08/24] feat(charts): test otel collector change role --- charts/otel-collector/templates/role.yaml | 9 +++++++++ charts/otel-collector/values.yaml | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/charts/otel-collector/templates/role.yaml b/charts/otel-collector/templates/role.yaml index a63489c4a..f7bd013b1 100644 --- a/charts/otel-collector/templates/role.yaml +++ b/charts/otel-collector/templates/role.yaml @@ -7,3 +7,12 @@ rules: - apiGroups: [""] resources: ["pods", "services", "endpoints"] verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["nodes", "namespaces", "replicationcontrollers", "resourcequotas"] + verbs: ["list", "watch"] + - apiGroups: ["apps"] + resources: ["deployments", "replicasets", "statefulsets", "daemonsets"] + verbs: ["list", "watch"] + - apiGroups: ["autoscaling"] + resources: ["horizontalpodautocalers"] + verbs: ["list", "watch"] diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 42014f1bd..7c336867d 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -6,6 +6,8 @@ opentelemetry-collector: presets: kubernetesAttributes: enabled: true + clusterMetrics: + enabled: true ports: prometheus: enabled: true @@ -72,7 +74,7 @@ opentelemetry-collector: receivers: k8s_cluster: node_conditions_to_report: [Ready, MemoryPressure, DiskPressure] - allocatable_types_to_report: [cpu, memory, ephemeral-stroage, pods] + allocatable_types_to_report: [cpu, memory, ephemeral-storage, pods] collection_interval: 30s metrics: k8s.node.condition: From 75b09553384318f59c06a1e9969fca95f4de5553 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 13:24:57 +0000 Subject: [PATCH 09/24] feat(charts): test otel kube state metrics --- charts/otel-collector/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 7c336867d..d0dd17600 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -97,8 +97,8 @@ opentelemetry-collector: - source_labels: [__meta_kubernetes_namespace] regex: monitoring action: drop - - source_labels: [__meta_kubernetes_namespace] - regex: kube-system + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: "kube-system;(?!kube-state-metrics$).*" action: drop - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep From f24a561b723df314b2e4ded3d1a3b707ccafe07e Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 14:15:18 +0000 Subject: [PATCH 10/24] feat(charts): test otel regex --- charts/otel-collector/values.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index d0dd17600..f3c67ab60 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -98,7 +98,11 @@ opentelemetry-collector: regex: monitoring action: drop - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_name] - regex: "kube-system;(?!kube-state-metrics$).*" + regex: "kube-system;kube-state-metrics" + target_label: __tmp_keep + replacement: "true" + - source_labels: [__meta_kubernetes_namespace, _tmp_keep] + regex: kube-system action: drop - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep From 5720c9f6e0b8c76611a5bb64b6cc8cbbde8f182a Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 14:35:46 +0000 Subject: [PATCH 11/24] feat(charts): test otel extra job --- charts/otel-collector/values.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index f3c67ab60..7f02362d0 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -84,6 +84,9 @@ opentelemetry-collector: prometheus: config: scrape_configs: + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] - job_name: 'kubernetes-pods' # scheme: https # tls_config: @@ -97,11 +100,7 @@ opentelemetry-collector: - source_labels: [__meta_kubernetes_namespace] regex: monitoring action: drop - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_name] - regex: "kube-system;kube-state-metrics" - target_label: __tmp_keep - replacement: "true" - - source_labels: [__meta_kubernetes_namespace, _tmp_keep] + - source_labels: [__meta_kubernetes_namespace] regex: kube-system action: drop - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] From 5edb7e3ec097770b922fe12668181bb449ea49a0 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 15:53:35 +0000 Subject: [PATCH 12/24] feat(charts): test otel drop resource_to_telemetry_conversion --- charts/otel-collector/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 7f02362d0..05090e4e8 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -146,7 +146,7 @@ opentelemetry-collector: prometheus: endpoint: 0.0.0.0:9090 resource_to_telemetry_conversion: - enabled: true + enabled: false otlphttp: endpoint: "https://otel.tracing.diamond.ac.uk:4318" timeout: 30s From 42fdfc2f1bc11d12c63e83a203e5021ae8cadf83 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 16:16:44 +0000 Subject: [PATCH 13/24] feat(charts): revert kube state metrics --- charts/otel-collector/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 05090e4e8..3c7b2e890 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -84,9 +84,9 @@ opentelemetry-collector: prometheus: config: scrape_configs: - - job_name: 'kube-state-metrics' - static_configs: - - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + # - job_name: 'kube-state-metrics' + # static_configs: + # - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] - job_name: 'kubernetes-pods' # scheme: https # tls_config: From 7bb8ae6784846d0aba4d855112ed42a682ea4110 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 16:38:06 +0000 Subject: [PATCH 14/24] feat(charts): test extra resources --- charts/monitoring/staging-values.yaml | 4 ++-- charts/monitoring/values.yaml | 16 ++++++++-------- charts/otel-collector/values.yaml | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/charts/monitoring/staging-values.yaml b/charts/monitoring/staging-values.yaml index 0f2dc364f..0c343d188 100644 --- a/charts/monitoring/staging-values.yaml +++ b/charts/monitoring/staging-values.yaml @@ -50,8 +50,8 @@ prometheus: extraScrapeConfigs: | - job_name: 'otel-collector' scheme: http - scrape_interval: 15s - scrape_timeout: 15s + scrape_interval: 30s + scrape_timeout: 30s static_configs: - targets: - "otel-collector-opentelemetry-collector-x-monitoring--b8f7839a3b:9090" diff --git a/charts/monitoring/values.yaml b/charts/monitoring/values.yaml index 0e7d0d51d..bb6ea64e0 100644 --- a/charts/monitoring/values.yaml +++ b/charts/monitoring/values.yaml @@ -83,8 +83,8 @@ thanos: enabled: false resources: limits: - cpu: 250m - memory: 2Gi + cpu: '1' + memory: 4Gi networkPolicy: enabled: false queryFrontend: @@ -92,8 +92,8 @@ thanos: enabled: false resources: limits: - cpu: 250m - memory: 2Gi + cpu: '1' + memory: 4Gi networkPolicy: enabled: false storegateway: @@ -102,8 +102,8 @@ thanos: enabled: false resources: limits: - cpu: 250m - memory: 2Gi + cpu: '1' + memory: 4Gi networkPolicy: enabled: false receive: @@ -284,8 +284,8 @@ prometheus: extraScrapeConfigs: | - job_name: 'otel-collector' scheme: https - scrape_interval: 15s - scrape_timeout: 15s + scrape_interval: 30s + scrape_timeout: 25s static_configs: - targets: - "otelcollector.workflows.diamond.ac.uk" diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 3c7b2e890..05090e4e8 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -84,9 +84,9 @@ opentelemetry-collector: prometheus: config: scrape_configs: - # - job_name: 'kube-state-metrics' - # static_configs: - # - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] - job_name: 'kubernetes-pods' # scheme: https # tls_config: From 7b388d87bda383ea9e96ff35cca9e600f4730a8f Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 25 Feb 2026 17:02:13 +0000 Subject: [PATCH 15/24] Revert "feat(charts): test extra resources" This reverts commit 7bb8ae6784846d0aba4d855112ed42a682ea4110. --- charts/monitoring/staging-values.yaml | 4 ++-- charts/monitoring/values.yaml | 16 ++++++++-------- charts/otel-collector/values.yaml | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/charts/monitoring/staging-values.yaml b/charts/monitoring/staging-values.yaml index 0c343d188..0f2dc364f 100644 --- a/charts/monitoring/staging-values.yaml +++ b/charts/monitoring/staging-values.yaml @@ -50,8 +50,8 @@ prometheus: extraScrapeConfigs: | - job_name: 'otel-collector' scheme: http - scrape_interval: 30s - scrape_timeout: 30s + scrape_interval: 15s + scrape_timeout: 15s static_configs: - targets: - "otel-collector-opentelemetry-collector-x-monitoring--b8f7839a3b:9090" diff --git a/charts/monitoring/values.yaml b/charts/monitoring/values.yaml index bb6ea64e0..0e7d0d51d 100644 --- a/charts/monitoring/values.yaml +++ b/charts/monitoring/values.yaml @@ -83,8 +83,8 @@ thanos: enabled: false resources: limits: - cpu: '1' - memory: 4Gi + cpu: 250m + memory: 2Gi networkPolicy: enabled: false queryFrontend: @@ -92,8 +92,8 @@ thanos: enabled: false resources: limits: - cpu: '1' - memory: 4Gi + cpu: 250m + memory: 2Gi networkPolicy: enabled: false storegateway: @@ -102,8 +102,8 @@ thanos: enabled: false resources: limits: - cpu: '1' - memory: 4Gi + cpu: 250m + memory: 2Gi networkPolicy: enabled: false receive: @@ -284,8 +284,8 @@ prometheus: extraScrapeConfigs: | - job_name: 'otel-collector' scheme: https - scrape_interval: 30s - scrape_timeout: 25s + scrape_interval: 15s + scrape_timeout: 15s static_configs: - targets: - "otelcollector.workflows.diamond.ac.uk" diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 05090e4e8..3c7b2e890 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -84,9 +84,9 @@ opentelemetry-collector: prometheus: config: scrape_configs: - - job_name: 'kube-state-metrics' - static_configs: - - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + # - job_name: 'kube-state-metrics' + # static_configs: + # - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] - job_name: 'kubernetes-pods' # scheme: https # tls_config: From 1ccb748250a778748a7406b735f17f75f84fb130 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 10:17:05 +0000 Subject: [PATCH 16/24] revert resource to telemetry conversion --- charts/otel-collector/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 3c7b2e890..e7d333c4a 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -146,7 +146,7 @@ opentelemetry-collector: prometheus: endpoint: 0.0.0.0:9090 resource_to_telemetry_conversion: - enabled: false + enabled: true otlphttp: endpoint: "https://otel.tracing.diamond.ac.uk:4318" timeout: 30s From 0513119ac60374174a143ae4ea366b4ebf30114b Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 12:00:53 +0000 Subject: [PATCH 17/24] feat(charts): remove kube-state-metrics --- charts/workflows-cluster/staging-values.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/charts/workflows-cluster/staging-values.yaml b/charts/workflows-cluster/staging-values.yaml index 5f1ce0af6..81ffe84a3 100644 --- a/charts/workflows-cluster/staging-values.yaml +++ b/charts/workflows-cluster/staging-values.yaml @@ -25,13 +25,6 @@ vcluster: name: argo-cd version: 7.7.22 repo: https://argoproj.github.io/argo-helm - - release: - name: kube-state-metrics - namespace: kube-system - chart: - name: kube-state-metrics - version: 7.1.0 - repo: https://prometheus-community.github.io/helm-charts - release: name: verflixt namespace: argocd From 53745dbce06ef370d67f5d3be5749aaa90713f21 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 15:34:07 +0000 Subject: [PATCH 18/24] feat(charts): test agent --- charts/otel-collector/values.yaml | 79 +++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index e7d333c4a..f21616b72 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -177,3 +177,82 @@ opentelemetry-collector: - k8sattributes exporters: - otlphttp + +opentelemetry-collector-agent: + enabled: true + image: + repository: otel/opentelemetry-collector-contrib + mode: daemonset + presets: + kubeletMetrics: + enabled: true + kubernetesAtributes: + enabled: true + resources: + requests: + cpu: '200m' + memory: 256Mi + limits: + cpu: '500m' + memory: 512Mi + ports: + prometheus: + enabled: true + containerPort: 9090 + servicePort: 9090 + protocol: TCP + jaegar-compact: + enabled: false + jaeger-thrift: + enabled: false + jaeger-grpc: + enabled: false + zipkin: + enabled: false + config: + processors: + batch: + send_batch_size: 256 + k8sattributes: + auh_type: 'serviceAccount' + extract: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.pod.uid + - k8s.node.name + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_atribute + name: k8s.pod.uid + - sources: + - from: connection + receivers: + kubeletstats: + collection_interval: 20s + auth_type: serviceAccount + endpoint: "https://${env:K8S_NODE_IP}:10250" + insecure_skip_verify: true + metrics_groups: + - node + - pod + - container + exporters: + prometheus: + endpoint: 0.0.0.0:9090 + resource_to_telemetry_conversion: + enabled: true + service: + pipelines: + metrics: + receivers: + - kubeletstats + processors: + - k8sattributes + - memory_limiter + - batch + exporters: + - prometheus From 3be2c0243db4645d6582640b638ad5411c2bd0e5 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 15:50:44 +0000 Subject: [PATCH 19/24] feat(charts): fix role and typo --- charts/otel-collector/templates/role.yaml | 5 ++++- charts/otel-collector/templates/rolebinding.yaml | 3 +++ charts/otel-collector/values.yaml | 8 ++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/charts/otel-collector/templates/role.yaml b/charts/otel-collector/templates/role.yaml index f7bd013b1..d8fc06649 100644 --- a/charts/otel-collector/templates/role.yaml +++ b/charts/otel-collector/templates/role.yaml @@ -14,5 +14,8 @@ rules: resources: ["deployments", "replicasets", "statefulsets", "daemonsets"] verbs: ["list", "watch"] - apiGroups: ["autoscaling"] - resources: ["horizontalpodautocalers"] + resources: ["horizontalpodautoscalers"] verbs: ["list", "watch"] + - apiGroups: [""] + resources: ["nodes/stats", "nodes/proxy", "nodes/metrics"] + verbs: ["get"] diff --git a/charts/otel-collector/templates/rolebinding.yaml b/charts/otel-collector/templates/rolebinding.yaml index 6d7fc65e3..442492325 100644 --- a/charts/otel-collector/templates/rolebinding.yaml +++ b/charts/otel-collector/templates/rolebinding.yaml @@ -7,6 +7,9 @@ subjects: - kind: ServiceAccount name: "{{ .Release.Name }}-opentelemetry-collector" namespace: {{ .Release.Namespace }} + - kind: ServiceAccount + name: "{{ .Release.Name }}-opentelemetry-collector-agent" + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: opentelemetry-collector diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index f21616b72..94d5576d5 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -186,7 +186,7 @@ opentelemetry-collector-agent: presets: kubeletMetrics: enabled: true - kubernetesAtributes: + kubernetesAttributes: enabled: true resources: requests: @@ -201,7 +201,7 @@ opentelemetry-collector-agent: containerPort: 9090 servicePort: 9090 protocol: TCP - jaegar-compact: + jaeger-compact: enabled: false jaeger-thrift: enabled: false @@ -214,7 +214,7 @@ opentelemetry-collector-agent: batch: send_batch_size: 256 k8sattributes: - auh_type: 'serviceAccount' + auth_type: 'serviceAccount' extract: metadata: - k8s.namespace.name @@ -226,7 +226,7 @@ opentelemetry-collector-agent: - from: resource_attribute name: k8s.pod.ip - sources: - - from: resource_atribute + - from: resource_attribute name: k8s.pod.uid - sources: - from: connection From 07986a069b9dea6b65102b39ea57cd9096f958dc Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 15:59:38 +0000 Subject: [PATCH 20/24] feat(charts): test otel chart.yaml --- charts/otel-collector/Chart.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/charts/otel-collector/Chart.yaml b/charts/otel-collector/Chart.yaml index f6f863c5e..c13c3da34 100644 --- a/charts/otel-collector/Chart.yaml +++ b/charts/otel-collector/Chart.yaml @@ -2,8 +2,12 @@ apiVersion: v2 name: otel-collector description: Workflows otel-collector type: application -version: 0.1.2 +version: 0.1.3 dependencies: - name: opentelemetry-collector repository: https://open-telemetry.github.io/opentelemetry-helm-charts version: 0.114.0 + - name: opentelemetry-collector + alias: opentelemetry-collector-agent + repository: https://open-telemetry.github.io/opentelemetry-helm-charts + version: 0.114.0 From 962c0e0be8d1676c9b69e572fb372cf3a354ae88 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 16:05:21 +0000 Subject: [PATCH 21/24] feat(charts): test helm update --- charts/otel-collector/Chart.lock | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/charts/otel-collector/Chart.lock b/charts/otel-collector/Chart.lock index 7621da74a..5c3f6b9de 100644 --- a/charts/otel-collector/Chart.lock +++ b/charts/otel-collector/Chart.lock @@ -2,5 +2,8 @@ dependencies: - name: opentelemetry-collector repository: https://open-telemetry.github.io/opentelemetry-helm-charts version: 0.114.0 -digest: sha256:f16aaab229e47fe11246ae1df02285c7ab672952af5b2ebab675c492aa65c63d -generated: "2025-08-04T15:28:37.439059528+01:00" +- name: opentelemetry-collector + repository: https://open-telemetry.github.io/opentelemetry-helm-charts + version: 0.114.0 +digest: sha256:ea718a2fe7e745fe1bc5523687d0b78f54e13d6036411cb8828916f690a10485 +generated: "2026-02-26T16:04:05.616035792Z" From f1ee03696e58de443037ea484e30ca5759181007 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 16:14:42 +0000 Subject: [PATCH 22/24] feat(charts): test disable otlp and otlp http for agent --- charts/otel-collector/values.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 94d5576d5..359f18317 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -196,6 +196,10 @@ opentelemetry-collector-agent: cpu: '500m' memory: 512Mi ports: + otlp: + enabled: false + otlp-http: + enabled: false prometheus: enabled: true containerPort: 9090 From b77c7c0c00e17e9861c1f5302cd4b14dbfa22e4d Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 16:16:55 +0000 Subject: [PATCH 23/24] feat(charts): test disable otlp and otlp http for agent --- charts/otel-collector/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 359f18317..9e809f55a 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -198,8 +198,8 @@ opentelemetry-collector-agent: ports: otlp: enabled: false - otlp-http: - enabled: false + otlp-http: + enabled: false prometheus: enabled: true containerPort: 9090 From aa97984c04df96756f47a1285ba5865951b0a740 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 26 Feb 2026 16:23:45 +0000 Subject: [PATCH 24/24] feat(charts): fix typo metric --- charts/otel-collector/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/otel-collector/values.yaml b/charts/otel-collector/values.yaml index 9e809f55a..09f559c6f 100644 --- a/charts/otel-collector/values.yaml +++ b/charts/otel-collector/values.yaml @@ -240,7 +240,7 @@ opentelemetry-collector-agent: auth_type: serviceAccount endpoint: "https://${env:K8S_NODE_IP}:10250" insecure_skip_verify: true - metrics_groups: + metric_groups: - node - pod - container