Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6768ab1
feat(charts): test sync nodes
teoching0705 Feb 24, 2026
eb59e10
feat(charts): test kube-state-metrics
teoching0705 Feb 24, 2026
817c390
feat(charts): test metrics server integrations
teoching0705 Feb 24, 2026
b3ce43b
feat(charts): fix linting
teoching0705 Feb 24, 2026
ada36b7
feat(charts): etcd pin version and typo
teoching0705 Feb 24, 2026
b8c4b62
feat(charts): specify tag
teoching0705 Feb 25, 2026
f3c3be0
feat(charts): test otel collector
teoching0705 Feb 25, 2026
11b63dd
feat(charts): test otel collector change role
teoching0705 Feb 25, 2026
75b0955
feat(charts): test otel kube state metrics
teoching0705 Feb 25, 2026
f24a561
feat(charts): test otel regex
teoching0705 Feb 25, 2026
5720c9f
feat(charts): test otel extra job
teoching0705 Feb 25, 2026
5edb7e3
feat(charts): test otel drop resource_to_telemetry_conversion
teoching0705 Feb 25, 2026
42fdfc2
feat(charts): revert kube state metrics
teoching0705 Feb 25, 2026
7bb8ae6
feat(charts): test extra resources
teoching0705 Feb 25, 2026
7b388d8
Revert "feat(charts): test extra resources"
teoching0705 Feb 25, 2026
1ccb748
revert resource to telemetry conversion
teoching0705 Feb 26, 2026
0513119
feat(charts): remove kube-state-metrics
teoching0705 Feb 26, 2026
53745db
feat(charts): test agent
teoching0705 Feb 26, 2026
3be2c02
feat(charts): fix role and typo
teoching0705 Feb 26, 2026
07986a0
feat(charts): test otel chart.yaml
teoching0705 Feb 26, 2026
962c0e0
feat(charts): test helm update
teoching0705 Feb 26, 2026
f1ee036
feat(charts): test disable otlp and otlp http for agent
teoching0705 Feb 26, 2026
b77c7c0
feat(charts): test disable otlp and otlp http for agent
teoching0705 Feb 26, 2026
aa97984
feat(charts): fix typo metric
teoching0705 Feb 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions charts/otel-collector/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@ dependencies:
- name: opentelemetry-collector
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
version: 0.114.0
digest: sha256:f16aaab229e47fe11246ae1df02285c7ab672952af5b2ebab675c492aa65c63d
generated: "2025-08-04T15:28:37.439059528+01:00"
- name: opentelemetry-collector
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
version: 0.114.0
digest: sha256:ea718a2fe7e745fe1bc5523687d0b78f54e13d6036411cb8828916f690a10485
generated: "2026-02-26T16:04:05.616035792Z"
6 changes: 5 additions & 1 deletion charts/otel-collector/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ apiVersion: v2
name: otel-collector
description: Workflows otel-collector
type: application
version: 0.1.2
version: 0.1.3
dependencies:
- name: opentelemetry-collector
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
version: 0.114.0
- name: opentelemetry-collector
alias: opentelemetry-collector-agent
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
version: 0.114.0
12 changes: 12 additions & 0 deletions charts/otel-collector/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,15 @@ rules:
- apiGroups: [""]
resources: ["pods", "services", "endpoints"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["nodes", "namespaces", "replicationcontrollers", "resourcequotas"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["deployments", "replicasets", "statefulsets", "daemonsets"]
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources: ["horizontalpodautoscalers"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes/stats", "nodes/proxy", "nodes/metrics"]
verbs: ["get"]
3 changes: 3 additions & 0 deletions charts/otel-collector/templates/rolebinding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ subjects:
- kind: ServiceAccount
name: "{{ .Release.Name }}-opentelemetry-collector"
namespace: {{ .Release.Namespace }}
- kind: ServiceAccount
name: "{{ .Release.Name }}-opentelemetry-collector-agent"
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: opentelemetry-collector
Expand Down
98 changes: 98 additions & 0 deletions charts/otel-collector/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ opentelemetry-collector:
presets:
kubernetesAttributes:
enabled: true
clusterMetrics:
enabled: true
ports:
prometheus:
enabled: true
Expand Down Expand Up @@ -70,9 +72,21 @@ opentelemetry-collector:
- sources:
- from: connection
receivers:
k8s_cluster:
node_conditions_to_report: [Ready, MemoryPressure, DiskPressure]
allocatable_types_to_report: [cpu, memory, ephemeral-storage, pods]
collection_interval: 30s
metrics:
k8s.node.condition:
enabled: true
k8s.pod.status_reason:
enabled: true
prometheus:
config:
scrape_configs:
# - job_name: 'kube-state-metrics'
# static_configs:
# - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']
- job_name: 'kubernetes-pods'
# scheme: https
# tls_config:
Expand Down Expand Up @@ -145,6 +159,7 @@ opentelemetry-collector:
pipelines:
metrics:
receivers:
- k8s_cluster
- prometheus
- otlp
processors:
Expand All @@ -162,3 +177,86 @@ opentelemetry-collector:
- k8sattributes
exporters:
- otlphttp

opentelemetry-collector-agent:
enabled: true
image:
repository: otel/opentelemetry-collector-contrib
mode: daemonset
presets:
kubeletMetrics:
enabled: true
kubernetesAttributes:
enabled: true
resources:
requests:
cpu: '200m'
memory: 256Mi
limits:
cpu: '500m'
memory: 512Mi
ports:
otlp:
enabled: false
otlp-http:
enabled: false
prometheus:
enabled: true
containerPort: 9090
servicePort: 9090
protocol: TCP
jaeger-compact:
enabled: false
jaeger-thrift:
enabled: false
jaeger-grpc:
enabled: false
zipkin:
enabled: false
config:
processors:
batch:
send_batch_size: 256
k8sattributes:
auth_type: 'serviceAccount'
extract:
metadata:
- k8s.namespace.name
- k8s.pod.name
- k8s.pod.uid
- k8s.node.name
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
receivers:
kubeletstats:
collection_interval: 20s
auth_type: serviceAccount
endpoint: "https://${env:K8S_NODE_IP}:10250"
insecure_skip_verify: true
metric_groups:
- node
- pod
- container
exporters:
prometheus:
endpoint: 0.0.0.0:9090
resource_to_telemetry_conversion:
enabled: true
service:
pipelines:
metrics:
receivers:
- kubeletstats
processors:
- k8sattributes
- memory_limiter
- batch
exporters:
- prometheus
8 changes: 8 additions & 0 deletions charts/workflows-cluster/staging-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ vcluster:
selfHeal: true
sync:
fromHost:
nodes:
enabled: true
secrets:
mappings:
byName:
Expand All @@ -73,6 +75,12 @@ vcluster:
"/postgres-application-passwords": "workflows/postgres-application-passwords"
"/postgres-initdb-script": "workflows/postgres-initdb-script"

integrations:
metricsServer:
enabled: true
nodes: true
pods: true

ingress:
secretName: letsencrypt-kubernetes-staging-workflows-diamond-ac-uk
host: kubernetes.staging.workflows.diamond.ac.uk
Expand Down
2 changes: 2 additions & 0 deletions charts/workflows-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ vcluster:
deploy:
enabled: true
statefulSet:
image:
tag: 3.6.4-0
extraArgs:
- --quota-backend-bytes=8589934592 # 8Gi
resources:
Expand Down
Loading