From c11d48902f4eb3ac1f5b408c2b1eb543873b6468 Mon Sep 17 00:00:00 2001 From: stxkxs Date: Tue, 23 Jun 2026 18:53:39 -0700 Subject: [PATCH] feat(dashboards): add the CloudWatch GrafanaDatasource CR the persona boards query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eight panels across the agent ops / kill-switch / agentgateway / finance dashboards set "datasource": "cloudwatch" to graph CloudWatch-native AWS metrics — AWS/SQS queue depth, AWS/States Step Functions execution health, AWS/Events rule failures, AWS/Bedrock throttling — none of which have a Prometheus equivalent. No GrafanaDatasource CR of that type existed, so every one rendered "datasource not found". The capability was already in place: the landing-zone AMG workspace enables the CloudWatch plugin (data_sources includes CLOUDWATCH) and its service role carries CloudWatch read IAM. Only the datasource instance was missing. This adds it: type cloudwatch, uid cloudwatch, access proxy, authType ec2_iam_role (the workspace's ambient role, no static keys), defaultRegion us-west-2 — constant across environments, so no per-env overlay patch. The finance board's four athena-cur panels are a separate, larger cross-repo fix (AMG Athena plugin + service-role IAM + per-env workgroup wiring) tracked separately. --- dashboards/base/datasources/cloudwatch.yaml | 27 +++++++++++++++++++++ dashboards/base/kustomization.yaml | 1 + 2 files changed, 28 insertions(+) create mode 100644 dashboards/base/datasources/cloudwatch.yaml diff --git a/dashboards/base/datasources/cloudwatch.yaml b/dashboards/base/datasources/cloudwatch.yaml new file mode 100644 index 0000000..1b188e7 --- /dev/null +++ b/dashboards/base/datasources/cloudwatch.yaml @@ -0,0 +1,27 @@ +# Amazon CloudWatch data source. The agent persona boards (ops, kill-switch, +# agentgateway, finance) graph CloudWatch-native AWS metrics that have no +# Prometheus equivalent — AWS/SQS queue depth, AWS/States Step Functions +# execution health, AWS/Events rule failures, AWS/Bedrock throttling. These +# exist in CloudWatch natively (no exporter), so Grafana queries them directly. +# +# access=proxy: AMG queries CloudWatch using its workspace IAM role, which the +# landing-zone AMG service role grants CloudWatch read access to, and the +# workspace's data_sources list enables the CloudWatch plugin. authType +# ec2_iam_role uses that ambient role (no static keys). Region is us-west-2 +# across every environment, so this needs no per-env overlay patch. +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: cloudwatch +spec: + instanceSelector: + matchLabels: + dashboards: external + datasource: + name: CloudWatch + uid: cloudwatch + type: cloudwatch + access: proxy + jsonData: + authType: ec2_iam_role + defaultRegion: us-west-2 diff --git a/dashboards/base/kustomization.yaml b/dashboards/base/kustomization.yaml index 83a9321..3a4c06f 100644 --- a/dashboards/base/kustomization.yaml +++ b/dashboards/base/kustomization.yaml @@ -13,6 +13,7 @@ resources: - datasources/prometheus.yaml - datasources/loki.yaml - datasources/tempo.yaml + - datasources/cloudwatch.yaml # Grafana-managed SLO / burn-rate alert rules (folder + per-system rule groups), # reconciled onto the external Amazon Managed Grafana. - alerting/folder.yaml