From ba89a460e6ed87b5a91b995d9cdd7cdc22df0375 Mon Sep 17 00:00:00 2001 From: FedorProshin Date: Fri, 14 Nov 2025 10:52:44 +0300 Subject: [PATCH 1/4] feat: added backward compatibility in values yaml for alerts, added alerts test using vmalert tool --- .github/workflows/alerts-test.yml | 52 + operator/charts/helm/kafka-service/Chart.yaml | 7 + .../charts/prometheusrules/Chart.yaml | 24 + .../prometheusrules/templates/_helpers.tpl | 133 + .../templates/prometheusrules.yaml | 52 + .../charts/prometheusrules/values.yaml | 0 .../templates/prometheus_rules.yaml | 24 +- .../charts/helm/kafka-service/values.yaml | 1 + operator/tests/alerts-tests/rules.yaml | 9251 +++++++++++++++++ operator/tests/alerts-tests/test.yaml | 356 + operator/tests/alerts-tests/tests-checker.sh | 33 + 11 files changed, 9921 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/alerts-test.yml create mode 100644 operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml create mode 100644 operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl create mode 100644 operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml create mode 100644 operator/charts/helm/kafka-service/charts/prometheusrules/values.yaml create mode 100644 operator/tests/alerts-tests/rules.yaml create mode 100644 operator/tests/alerts-tests/test.yaml create mode 100644 operator/tests/alerts-tests/tests-checker.sh diff --git a/.github/workflows/alerts-test.yml b/.github/workflows/alerts-test.yml new file mode 100644 index 00000000..ca272ae3 --- /dev/null +++ b/.github/workflows/alerts-test.yml @@ -0,0 +1,52 @@ +name: Alerts-test-kafka-operator +on: + workflow_run: + workflows: ["Build Artifacts"] + types: + - completed + pull_request: + branches: + - all + +env: + max_attempts: 30 + delay: 10 + +jobs: + Run-Alerts-Test: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Check yq version + run: yq --version + + - name: Install Helm + run: | + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + + - name: Render rules file from helm chart + run: | + helm template kafka-montemplate ./operator/charts/helm/kafka-service/ > ./operator/tests/alerts-tests/rules.yaml + sed -n '/prometheus_rules.yaml/,/---/p' -i ./operator/tests/alerts-tests/rules.yaml + sed '1,13d' -i ./operator/tests/alerts-tests/rules.yaml + + + - name: Check that all necessary tests exists + run: | + chmod +x ./operator/tests/alerts-tests/tests-checker.sh + cd ./operator/tests/alerts-tests/ + ./tests-checker.sh + continue-on-error: true + + - name: Install vmalert-tool + run: | + wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.122.4/vmutils-linux-amd64-v1.122.4-enterprise.tar.gz + tar -xvf vmutils-linux-amd64-v1.122.4-enterprise.tar.gz + chmod +x vmalert-tool-prod + + - name: Run test + run: | + ./vmalert-tool-prod unittest --files ./operator/tests/alerts-tests/test.yaml \ No newline at end of file diff --git a/operator/charts/helm/kafka-service/Chart.yaml b/operator/charts/helm/kafka-service/Chart.yaml index 789999a5..45ad2420 100644 --- a/operator/charts/helm/kafka-service/Chart.yaml +++ b/operator/charts/helm/kafka-service/Chart.yaml @@ -19,3 +19,10 @@ version: 1.0.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: 1.0.0 + +dependencies: + # Prometheus alert rules +- name: prometheusrules + condition: monitoring.install + version: ~0 + repository: "file://charts/prometheusrules" \ No newline at end of file diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml b/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml new file mode 100644 index 00000000..004fc6f2 --- /dev/null +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: prometheusrules +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl new file mode 100644 index 00000000..89a8d83b --- /dev/null +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl @@ -0,0 +1,133 @@ +{{- define "defaultAlerts" -}} +{{- if and (eq .Values.alertsPackVersion "v2") (.Values.install) }} + - name: {{ .Release.Namespace }}-{{ .Release.Name }} + rules: + - alert: KafkaIsDegradedAlert + annotations: + description: 'Kafka is Degraded' + summary: Some of Kafka Service pods are down + expr: kafka_cluster_status{namespace="{{ .Release.Namespace }}",container="{{ template "kafka.name" . }}-monitoring"} == 6 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaMetricsAreAbsent + annotations: + description: 'Kafka metrics are absent on {{ .Release.Namespace }}.' + summary: Kafka metrics are absent + expr: absent(kafka_cluster_status{namespace="{{ .Release.Namespace }}"}) == 1 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaIsDownAlert + annotations: + description: 'Kafka is Down' + summary: All of Kafka Service pods are down + expr: kafka_cluster_status{namespace="{{ .Release.Namespace }}",container="{{ template "kafka.name" . }}-monitoring"} == 10 + for: 3m + labels: + severity: critical + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaCPUUsageAlert + annotations: + description: 'Kafka CPU usage is higher than 95 percents' + summary: Some of Kafka Service pods load CPU higher then 95 percents + expr: max(rate(container_cpu_usage_seconds_total{namespace="{{ .Release.Namespace }}",pod=~"{{ template "kafka.name" . }}-[0-9].*",container="kafka"}[5m])) / max(kube_pod_container_resource_limits_cpu_cores{exported_namespace="{{ .Release.Namespace }}",exported_pod=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaMemoryUsageAlert + annotations: + description: 'Kafka memory usage is higher than 95 percents' + summary: Some of Kafka Service pods use memory higher then 95 percents + expr: max(container_memory_working_set_bytes{namespace="{{ .Release.Namespace }}",pod=~"{{ template "kafka.name" . }}-[0-9].*",container="kafka"}) / max(kube_pod_container_resource_limits_memory_bytes{exported_namespace="{{ .Release.Namespace }}",exported_pod=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaHeapMemoryUsageAlert + annotations: + description: 'Kafka heap memory usage is higher than 95 percents' + summary: Some of Kafka Service pods use heap memory higher then 95 percents + expr: max(java_Memory_HeapMemoryUsage_used{namespace="{{ .Release.Namespace }}",broker=~"{{ template "kafka.name" . }}-[0-9].*"}) / max(java_Memory_HeapMemoryUsage_max{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaGCCountAlert + annotations: + description: 'Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.monitoring.thresholds.gcCountAlert }}' + summary: Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.monitoring.thresholds.gcCountAlert }} + expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}[5m])) > {{ .Values.monitoring.thresholds.gcCountAlert }} + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + - alert: KafkaLagAlert + annotations: + description: 'Some of Kafka Service pods have partition lag higher than {{ .Values.monitoring.thresholds.lagAlert }}' + summary: Some of Kafka Service pods have partition lag higher than {{ .Values.monitoring.thresholds.lagAlert }} + expr: max(kafka_consumergroup_group_lag{namespace="{{ .Release.Namespace }}"}) > {{ .Values.monitoring.thresholds.lagAlert }} + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + {{- if .Values.monitoring.thresholds.partitionCountAlert }} + - alert: KafkaPartitionCountAlert + annotations: + description: 'Kafka Partition count for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.partitionCountAlert }}' + summary: Some of Kafka Partition count is higher than {{ .Values.monitoring.thresholds.partitionCountAlert }} + expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.partitionCountAlert }} + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + {{- end }} + {{- if .Values.monitoring.thresholds.brokerSkewAlert }} + - alert: KafkaBrokerSkewAlert + annotations: + description: 'Kafka Broker Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}%' + summary: Some of Kafka Broker Skew is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}% + expr: (kafka_broker_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerSkewAlertPartitionCount (include "kafka.replicas" . ) }}) + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + {{- end }} + {{- if .Values.monitoring.thresholds.brokerLeaderSkewAlert }} + - alert: KafkaBrokerLeaderSkewAlert + annotations: + description: 'Kafka Broker Leader Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}%' + summary: Some of Kafka Broker Leader Skew is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}% + expr: (kafka_broker_leader_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerLeaderSkewAlertPartitionCount (include "kafka.replicas" . ) }}) + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} + {{- end }} + - alert: SupplementaryServicesCompatibilityAlert + annotations: + description: 'Kafka supplementary services in namespace {{`{{ $labels.namespace }}`}} is not compatible with Kafka version {{`{{ $labels.application_version }}`}}' + summary: 'Kafka supplementary services in namespace {{`{{ $labels.namespace }}`}} is not compatible with Kafka version {{`{{ $labels.application_version }}`}}, allowed range is {{`{{ $labels.min_version }}`}} - {{`{{ $labels.max_version }}`}}' + expr: supplementary_services_version_compatible{application="kafka", namespace="{{ .Release.Namespace }}"} != 1 + for: 3m + labels: + severity: warning + namespace: {{ .Release.Namespace }} + service: {{ .Release.Name }} +{{- end }} +{{- end }} + diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml new file mode 100644 index 00000000..b04920bf --- /dev/null +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml @@ -0,0 +1,52 @@ +{{- if and (eq .Values.alertsPackVersion "v2") (.Values.prometheusMonitoring) }} +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: prometheusrules +spec: + groups: + +{{- $defaultConfig := fromYaml (include "defaultAlerts" . ) -}} +{{- $overrideConfig := .Values.alerts -}} +{{- $finalConfig := merge $overrideConfig $defaultConfig -}} +{{- $alertGroups := .Values.ruleGroups -}} + + +{{- range $defaultGroupName, $defaultGroup := $finalConfig }} +{{- $found := false }} +{{- range $alertGroups }} + {{- if eq $defaultGroupName . }} + {{- $found = true }} + {{- end }} +{{- end }} +{{- if $found }} + - name: {{ $defaultGroupName }} + labels: +{{- range $defaultLabelName, $defaultLabelValue := $defaultGroup.labels }} + {{ $defaultLabelName }}: {{ $defaultLabelValue }} +{{- end }} + {{- if $defaultGroup.interval }} + interval: {{ $defaultGroup.interval }} + {{- end }} + {{- if $defaultGroup.concurrency }} + concurrency: {{ $defaultGroup.concurrency }} + {{- end }} + rules: +{{- range $defaultRuleName, $defaultRule := $defaultGroup.rules }} + - alert: {{ $defaultRuleName }} + expr: {{ $defaultRule.expr }} + {{- if $defaultRule.for }} + for: {{ $defaultRule.for }} + {{- end }} + labels: +{{- range $defaultLabelName, $defaultLabelValue := $defaultRule.labels }} + {{ $defaultLabelName }}: {{ $defaultLabelValue }} +{{- end }} + annotations: +{{- range $defaultAnnotationName, $defaultAnnotationValue := $defaultRule.annotations }} + {{ $defaultAnnotationName }}: {{ printf $defaultAnnotationValue | trimAll "\n" | toJson | replace "\\u0026" "&" | replace "\\u003e" ">" | nindent 14 }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/values.yaml b/operator/charts/helm/kafka-service/charts/prometheusrules/values.yaml new file mode 100644 index 00000000..e69de29b diff --git a/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml b/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml index 58f6e29e..e42c434c 100644 --- a/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml +++ b/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml @@ -1,4 +1,4 @@ -{{- if (and (eq (include "monitoring.install" .) "true") (ne (include "monitoring.type" .) "influxdb") .Values.global.installDashboard (ne (.Values.monitoring.installGrafanaDashboard | toString) "false")) }} +{{- if (and (eq (include "monitoring.install" .) "true") (ne .Values.alertsPackVersion "v2") (ne (include "monitoring.type" .) "influxdb") .Values.global.installDashboard (ne (.Values.monitoring.installGrafanaDashboard | toString) "false")) }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -19,7 +19,7 @@ spec: expr: kafka_cluster_status{namespace="{{ .Release.Namespace }}",container="{{ template "kafka.name" . }}-monitoring"} == 6 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaMetricsAreAbsent @@ -29,7 +29,7 @@ spec: expr: absent(kafka_cluster_status{namespace="{{ .Release.Namespace }}"}) == 1 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaIsDownAlert @@ -49,7 +49,7 @@ spec: expr: max(rate(container_cpu_usage_seconds_total{namespace="{{ .Release.Namespace }}",pod=~"{{ template "kafka.name" . }}-[0-9].*",container="kafka"}[5m])) / max(kube_pod_container_resource_limits_cpu_cores{exported_namespace="{{ .Release.Namespace }}",exported_pod=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaMemoryUsageAlert @@ -59,7 +59,7 @@ spec: expr: max(container_memory_working_set_bytes{namespace="{{ .Release.Namespace }}",pod=~"{{ template "kafka.name" . }}-[0-9].*",container="kafka"}) / max(kube_pod_container_resource_limits_memory_bytes{exported_namespace="{{ .Release.Namespace }}",exported_pod=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaHeapMemoryUsageAlert @@ -69,7 +69,7 @@ spec: expr: max(java_Memory_HeapMemoryUsage_used{namespace="{{ .Release.Namespace }}",broker=~"{{ template "kafka.name" . }}-[0-9].*"}) / max(java_Memory_HeapMemoryUsage_max{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}) > 0.95 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaGCCountAlert @@ -79,7 +79,7 @@ spec: expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}[5m])) > {{ .Values.monitoring.thresholds.gcCountAlert }} for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - alert: KafkaLagAlert @@ -89,7 +89,7 @@ spec: expr: max(kafka_consumergroup_group_lag{namespace="{{ .Release.Namespace }}"}) > {{ .Values.monitoring.thresholds.lagAlert }} for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- if .Values.monitoring.thresholds.partitionCountAlert }} @@ -100,7 +100,7 @@ spec: expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.partitionCountAlert }} for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} @@ -112,7 +112,7 @@ spec: expr: (kafka_broker_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerSkewAlertPartitionCount (include "kafka.replicas" . ) }}) for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} @@ -124,7 +124,7 @@ spec: expr: (kafka_broker_leader_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerLeaderSkewAlertPartitionCount (include "kafka.replicas" . ) }}) for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} @@ -135,7 +135,7 @@ spec: expr: supplementary_services_version_compatible{application="kafka", namespace="{{ .Release.Namespace }}"} != 1 for: 3m labels: - severity: high + severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} diff --git a/operator/charts/helm/kafka-service/values.yaml b/operator/charts/helm/kafka-service/values.yaml index c7d0ee20..4ca740d9 100644 --- a/operator/charts/helm/kafka-service/values.yaml +++ b/operator/charts/helm/kafka-service/values.yaml @@ -196,6 +196,7 @@ kafka: monitoring: install: true + alertsPackVersion: "v1" dockerImage: ghcr.io/netcracker/qubership-kafka-monitoring:main serviceMonitorEnabled: true # affinity: { diff --git a/operator/tests/alerts-tests/rules.yaml b/operator/tests/alerts-tests/rules.yaml new file mode 100644 index 00000000..741050b3 --- /dev/null +++ b/operator/tests/alerts-tests/rules.yaml @@ -0,0 +1,9251 @@ +--- +# Source: kafka-service/templates/operator/service_account.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-service-operator + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' +--- +# Source: kafka-service/templates/akhq-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: akhq-secret + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' +type: Opaque +stringData: + akhq_default_user: "" + akhq_default_password: "" +--- +# Source: kafka-service/templates/kafka-monitoring-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: kafka-monitoring-secret + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' +type: Opaque +stringData: + prometheus-username: "" + prometheus-password: "" +--- +# Source: kafka-service/templates/kafka-services-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: kafka-services-secret + labels: + automation.infra/secret-change: "true" + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + annotations: + kafkaservice.netcracker.com/auto-restart: "true" +type: Opaque +stringData: + admin-username: "" + admin-password: "" + client-username: "" + client-password: "" +--- +# Source: kafka-service/templates/kafka-monitoring-configuration.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka-monitoring-configuration + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + name: kafka-monitoring + component: kafka-monitoring +data: + config: |- + # Telegraf Configuration + # + # Telegraf is entirely plugin driven. All metrics are gathered from the + # declared inputs, and sent to the declared outputs. + # + # Plugins must be declared in here to be active. + # To deactivate a plugin, comment out the name and any variables. + # + # Use 'telegraf -config telegraf.conf -test' to see what metrics a config + # file would generate. + # + # Environment variables can be used anywhere in this config file, simply prepend + # them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), + # for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + + + # Global tags can be specified here in key="value" format. + [global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + project_name= "$OS_PROJECT" + + + # Configuration for telegraf agent + [agent] + ## Default data collection interval for all inputs + interval = "$DATA_COLLECTION_INTERVAL" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + ## This buffer only fills when writes fail to output plugin(s). + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default, precision will be set to the same timestamp order as the + ## collection interval, with the maximum being 1s. + ## Precision will NOT be used for service inputs, such as logparser and statsd. + ## Valid values are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Logging configuration: + ## Run telegraf with debug log messages. + debug = true + ## Run telegraf in quiet mode (error log messages only). + quiet = false + ## Specify the log file name. The empty string means to log to stderr. + logfile = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + + + ############################################################################### + # OUTPUT PLUGINS # + ############################################################################### + # Publish all metrics to /metrics for Prometheus to scrape + [[outputs.prometheus_client]] + ## Address to listen on. + listen = ":8096" + + ## Metric version controls the mapping from Telegraf metrics into + ## Prometheus format. When using the prometheus input, use the same value in + ## both plugins to ensure metrics are round-tripped without modification. + ## + ## example: metric_version = 1; deprecated in 1.13 + ## metric_version = 2; recommended version + # metric_version = 1 + + ## Use HTTP Basic Authentication. + basic_username = "$PROMETHEUS_USERNAME" + basic_password = "$PROMETHEUS_PASSWORD" + ## If set, the IP Ranges which are allowed to access metrics. + ## ex: ip_range = ["192.168.0.0/24", "192.168.1.0/30"] + # ip_range = [] + + ## Path to publish the metrics on. + # path = "/metrics" + + ## Expiration interval for each metric. 0 == no expiration + # expiration_interval = "60s" + + ## Collectors to enable, valid entries are "gocollector" and "process". + ## If unset, both are enabled. + # collectors_exclude = ["gocollector", "process"] + + ## Send string metrics as Prometheus labels. + ## Unless set to false all string metrics will be sent as labels. + # string_as_label = true + + ## If set, enable TLS with the given certificate. + # tls_cert = "/etc/ssl/telegraf.crt" + # tls_key = "/etc/ssl/telegraf.key" + + ## Set one or more allowed client CA certificate file names to + ## enable mutually authenticated TLS connections + # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] + + ## Export metric collection time. + # export_timestamp = false + ############################################################################### + # PROCESSOR PLUGINS # + ############################################################################### + + # # Print all metrics that pass through this filter. + # [[processors.printer]] + + + ############################################################################### + # AGGREGATOR PLUGINS # + ############################################################################### + + # # Keep the aggregate min/max of each metric passing through. + # [[aggregators.minmax]] + # ## General Aggregator Arguments: + # ## The period on which to flush & clear the aggregator. + # period = "30s" + # ## If true, the original metric will be dropped by the + # ## aggregator and will not get sent to the output plugins. + # drop_original = false + + + ############################################################################### + # INPUT PLUGINS # + ############################################################################### + + # Read metrics from one or more commands that can output to stdout + [[inputs.exec]] + ## Commands array + commands = [ + "python3 /opt/kafka-monitoring/exec-scripts/kafka_metric.py", + "/additional-metrics" + ] + + ## Timeout for each command to complete. + timeout = "$KAFKA_EXEC_PLUGIN_TIMEOUT" + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" + + ############################################################################### + # SERVICE INPUT PLUGINS # + ############################################################################### +--- +# Source: kafka-service/templates/operator/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kafka-service-operator + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' +rules: + - apiGroups: + - apps + resources: + - deployments + - replicasets + - statefulsets + - daemonsets + verbs: + - get + - create + - list + - update + - watch + - patch + - delete + - apiGroups: + - "" + resources: + - pods + - configmaps + - services + - persistentvolumeclaims + - secrets + - serviceaccounts + verbs: + - get + - create + - list + - update + - watch + - patch + - delete + - apiGroups: + - netcracker.com + resources: + - '*' + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +--- +# Source: kafka-service/templates/operator/role_binding.yaml +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kafka-service-operator + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' +subjects: + - kind: ServiceAccount + name: kafka-service-operator +roleRef: + kind: Role + name: kafka-service-operator + apiGroup: rbac.authorization.k8s.io +--- +# Source: kafka-service/templates/operator/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kafka-service-operator + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + app.kubernetes.io/name: kafka-service-operator + name: kafka-service-operator + app.kubernetes.io/instance: kafka-service-operator-default + app.kubernetes.io/technology: go +spec: + replicas: 1 + selector: + matchLabels: + name: kafka-service-operator + strategy: + type: Recreate + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + name: kafka-service-operator + app.kubernetes.io/name: kafka-service-operator + app.kubernetes.io/technology: "go" + spec: + serviceAccountName: kafka-service-operator + containers: + - name: kafka-service-operator + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + image: ghcr.io/netcracker/qubership-kafka-service-operator:main + command: + - /manager + imagePullPolicy: Always + env: + - name: WATCH_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: KMM_ENABLED + value: "false" + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: OPERATOR_NAME + value: kafka-service-operator + - name: KMM_CONFIG_RECONCILE_PERIOD_SECONDS + value: "100" + - name: OPERATOR_MODE + value: "kafkaservice" + - name: CLUSTER_NAME + value: kafka + - name: API_GROUP + value: netcracker.com + resources: + requests: + memory: 512Mi + cpu: 100m + limits: + memory: 512Mi + cpu: 100m + livenessProbe: + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + initialDelaySeconds: 40 + timeoutSeconds: 15 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 5 + securityContext: + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" +--- +# Source: kafka-service/templates/akhq-ingress.yaml +# Ingress for AKHQ UI +--- +# Source: kafka-service/templates/grafana_dashboard.yaml +apiVersion: integreatly.org/v1alpha1 +kind: GrafanaDashboard +metadata: + name: kafka-grafana-dashboard + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + app: grafana +spec: + name: kafka-dashboard.json + json: > + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Kafka Monitoring", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 436, + "links": [ + { + "icon": "dashboard", + "tags": [], + "targetBlank": true, + "title": "Kubernetes / Pod Resources", + "type": "link", + "url": "/d/uaHNcPvMz/kubernetes-pod-resources?var-namespace=${namespace}" + }, + { + "icon": "dashboard", + "tags": [], + "targetBlank": true, + "title": "Kafka Lag Exporter", + "tooltip": "Open Kafka Lag Exporter dashboard (when corresponding Lag Exporter is enabled). ", + "type": "link", + "url": "/d/8LW1Yd8ik/kafka-lag-exporter?var-namespace=${namespace}" + }, + { + "icon": "dashboard", + "tags": [], + "targetBlank": true, + "title": "Kafka Topics", + "tooltip": "Open Kafka Topics dashboard", + "type": "link", + "url": "/d/d16460083/kafka-topics?var-namespace=${namespace}" + }, + { + "icon": "dashboard", + "tags": [], + "targetBlank": true, + "title": "Node Details", + "tooltip": "Open Node Details dashboard for external managed Kafka node exporter.", + "type": "link", + "url": "/d/rYdddlPWk/node-details?var-job=kafka-node-exporter" + }, + { + "icon": "dashboard", + "tags": [], + "targetBlank": true, + "title": "Namespace Resources", + "tooltip": "Open Namespace Resources dashboard", + "type": "link", + "url": "/d/EEjW8rVGz/kubernetes-namespace-resources?var-namespace=${namespace}" + }, + { + "asDropdown": false, + "icon": "dashboard", + "includeVars": false, + "keepTime": false, + "tags": [], + "targetBlank": true, + "title": "Backup Daemon Dashboard", + "tooltip": "", + "type": "link", + "url": "/d/9EEzia24k/backup-daemon?var-namespace=${namespace}" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 47, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Cluster Overview", + "type": "row" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Current status of Kafka cluster", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "UP" + }, + "6": { + "text": "DEGRADED" + }, + "10": { + "text": "DOWN" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(81, 167, 44, 0.9)", + "value": null + }, + { + "color": "rgba(199, 132, 42, 0.89)", + "value": 1 + }, + { + "color": "rgba(206, 36, 50, 0.97)", + "value": 7 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 33, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster Status", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Current size of Kafka cluster", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 5, + "y": 1 + }, + "id": 9, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "min(kafka_cluster_size{namespace=\"$namespace\", cluster=\"$cluster\"})", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster Size", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Current count of ready pods. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 8, + "y": 1 + }, + "id": 102, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kube_pod_status_ready{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\", condition=\"true\"}) OR on() vector(-1)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Ready Pods", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Current state of controller broker. The controller changes state when processing an event. Most of the time, the Idle state is displayed, which means that the controller has processed all the events. If the event processing takes a considerable time (several seconds or more), this state of the controller will be displayed. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 18, + "text": "Idle" + }, + "1": { + "index": 17, + "text": "Controller Change" + }, + "2": { + "index": 16, + "text": "Broker Change" + }, + "3": { + "index": 15, + "text": "Topic Change" + }, + "4": { + "index": 14, + "text": "Topic Deletion" + }, + "5": { + "index": 13, + "text": "Alter Partition Reassignment" + }, + "6": { + "index": 12, + "text": "Auto Leader Balance" + }, + "7": { + "index": 11, + "text": "Manual Leader Balance" + }, + "8": { + "index": 10, + "text": "Controlled Shutdown" + }, + "9": { + "index": 9, + "text": "Isr Change" + }, + "10": { + "index": 8, + "text": "Leader And Isr Response Received" + }, + "11": { + "index": 7, + "text": "Log Dir Change" + }, + "12": { + "index": 6, + "text": "Controller Shutdown" + }, + "13": { + "index": 5, + "text": "Unclean Leader Election Enable" + }, + "14": { + "index": 4, + "text": "Topic Unclean Leader Election Enable" + }, + "15": { + "index": 3, + "text": "List Partition Reassignment" + }, + "16": { + "index": 2, + "text": "Update Metadata Response Received" + }, + "101": { + "index": 19, + "text": "KRaft" + }, + "-1": { + "index": 1, + "text": "Cluster Fail" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(204, 204, 220)", + "value": null + }, + { + "color": "#1F60C4", + "value": 0 + }, + { + "color": "#FA6400", + "value": 1 + }, + { + "color": "dark-blue", + "value": 101 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 11, + "y": 1 + }, + "id": 62, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(kafka_controller_KafkaController_Value{name=\"ControllerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kafka_cluster_quorum_mode{namespace=\"$namespace\"} == 101", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "B" + } + ], + "title": "Controller State", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Current controller broker.\nThe first node to boot in a Kafka cluster automatically becomes the controller, and there can be only one. The controller in a Kafka cluster is responsible for maintaining the list of partition leaders, and coordinating leadership transitions (in the event a partition leader becomes unavailable). If it becomes necessary to replace the controller, a new controller is randomly chosen by ZooKeeper from the pool of brokers. In general, it is not possible for this value to be greater than one, but you should definitely alert on a value of zero", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "Active controller is not elected" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 24, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "kafka_controller_KafkaController_Value{name=\"ActiveControllerCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"} > 0", + "instant": true, + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Controller Broker", + "type": "stat" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Kafka version running on controller broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 60, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "group by (kafka_version) (kafka_cluster_status{namespace=\"$namespace\",cluster=\"$cluster\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{kafka_version}}", + "refId": "B" + } + ], + "title": "Kafka Version", + "type": "stat" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Transitions of Kafka cluster statuses", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "fillOpacity": 100, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "fieldMinMax": false, + "mappings": [ + { + "options": { + "0": { + "color": "semi-dark-green", + "index": 5, + "text": "UP" + }, + "6": { + "color": "semi-dark-orange", + "index": 4, + "text": "DEGRADED" + }, + "10": { + "color": "semi-dark-red", + "index": 3, + "text": "DOWN" + }, + "N/A": { + "color": "#CCC", + "index": 2 + }, + "null": { + "index": 1, + "text": "N/A" + } + }, + "type": "value" + }, + { + "options": { + "from": null, + "result": { + "index": 0, + "text": "N/A" + }, + "to": null + }, + "type": "range" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 106, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Cluster State", + "queryType": "randomWalk", + "range": true, + "refId": "A" + } + ], + "title": "Cluster Status Transitions", + "type": "state-timeline" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Transitions of controller broker states. The controller changes state when processing an event. Most of the time, the Idle state is displayed, which means that the controller has processed all the events. If the event processing takes a considerable time (several seconds or more), this state of the controller will be displayed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "fillOpacity": 100, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "dark-blue", + "index": 19, + "text": "Idle" + }, + "1": { + "color": "dark-orange", + "index": 18, + "text": "Controller Change" + }, + "2": { + "color": "dark-orange", + "index": 17, + "text": "Broker Change" + }, + "3": { + "color": "dark-orange", + "index": 16, + "text": "Topic Change" + }, + "4": { + "color": "dark-orange", + "index": 15, + "text": "Topic Deletion" + }, + "5": { + "color": "dark-orange", + "index": 14, + "text": "Alter Partition Reassignment" + }, + "6": { + "color": "dark-orange", + "index": 13, + "text": "Auto Leader Balance" + }, + "7": { + "color": "dark-orange", + "index": 12, + "text": "Manual Leader Balance" + }, + "8": { + "color": "dark-orange", + "index": 11, + "text": "Controlled Shutdown" + }, + "9": { + "color": "dark-orange", + "index": 10, + "text": "Isr Change" + }, + "10": { + "color": "dark-orange", + "index": 9, + "text": "Leader And Isr Response Received" + }, + "11": { + "color": "dark-orange", + "index": 8, + "text": "Log Dir Change" + }, + "12": { + "color": "dark-orange", + "index": 7, + "text": "Controller Shutdown" + }, + "13": { + "color": "dark-orange", + "index": 6, + "text": "Unclean Leader Election Enable" + }, + "14": { + "color": "dark-orange", + "index": 5, + "text": "Topic Unclean Leader Election Enable" + }, + "15": { + "color": "dark-orange", + "index": 4, + "text": "List Partition Reassignment" + }, + "16": { + "color": "dark-orange", + "index": 3, + "text": "Update Metadata Response Received" + }, + "-1": { + "color": "dark-red", + "index": 1, + "text": "Cluster Fail" + }, + "null": { + "color": "text", + "index": 2, + "text": "N/A" + } + }, + "type": "value" + }, + { + "options": { + "from": null, + "result": { + "color": "text", + "index": 0, + "text": "N/A" + }, + "to": null + }, + "type": "range" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 107, + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(kafka_controller_KafkaController_Value{name=\"ControllerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", + "interval": "", + "legendFormat": "Controller state", + "queryType": "randomWalk", + "range": true, + "refId": "A" + } + ], + "title": "Controller State Transitions", + "type": "state-timeline" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Transitions of readiness probes for each Kafka pod. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "fillOpacity": 100, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "semi-dark-red", + "index": 3, + "text": "FAILURE" + }, + "1": { + "color": "semi-dark-green", + "index": 2, + "text": "SUCCESS" + }, + "null": { + "color": "text", + "index": 1, + "text": "N/A" + } + }, + "type": "value" + }, + { + "options": { + "from": null, + "result": { + "color": "text", + "index": 0, + "text": "N/A" + }, + "to": null + }, + "type": "range" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 98, + "interval": "$inter", + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "min(kube_pod_status_ready{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\", condition=\"true\"}) by (exported_pod)", + "interval": "", + "legendFormat": "{{exported_pod}}", + "refId": "A" + } + ], + "title": "Pod Readiness Probe Transitions", + "type": "state-timeline" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Information about brokers configuration consistency", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 58, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "{{same_configs}}", + "refId": "A" + } + ], + "title": "Similar Configs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Information about broker states, number of partitions replicas on each broker, number of partitions where current broker is a leader, skews and number of under replicated partitions", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 0, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "broker" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker State" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Partitions" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2000 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 4000 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker Skew" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#56A64B", + "value": -50 + }, + { + "color": "#C4162A", + "value": 50 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Partition Leadership" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker Leader Skew" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "#C4162A", + "value": null + }, + { + "color": "#56A64B", + "value": -50 + }, + { + "color": "#C4162A", + "value": 50 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Under Replicated Partitions" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "#FA6400", + "value": null + }, + { + "color": "#56A64B", + "value": 0 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "Under Min ISR Partitions" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "#FA6400", + "value": null + }, + { + "color": "#56A64B", + "value": 0 + }, + { + "color": "#C4162A", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 66, + "interval": "$inter", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_server_KafkaServer_Value{name=\"BrokerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(kafka_server_ReplicaManager_Value{name=\"PartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_broker_skew{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_server_ReplicaManager_Value{name=\"LeaderCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_broker_leader_skew{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_server_ReplicaManager_Value{name=\"UnderReplicatedPartitions\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_server_ReplicaManager_Value{name=\"UnderMinIsrPartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + } + ], + "timeFrom": "5m", + "title": "Brokers", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 164, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Broker Issues", + "type": "row" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The number of under-replicated partitions. Replicas that are added as part of a reassignment will not count toward this value", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 168, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "kafka_server_ReplicaManager_Value{name=\"UnderReplicatedPartitions\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", + "legendFormat": "{{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Under Replicated Partitions", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The number of partitions whose in-sync replicas (ISR) count is less than `minIsr`", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 169, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "kafka_server_ReplicaManager_Value{name=\"UnderMinIsrPartitionCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", + "legendFormat": "{{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Under Min ISR Partitions", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The number of partitions that don’t have an active leader and are hence not writable or readable", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 170, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kafka_controller_KafkaController_Value{name=\"OfflinePartitionsCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", + "legendFormat": "{{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Offline Partitions", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The number of partitions which have open transactions with durations exceeding `transaction.max.timeout.ms` (plus 5 minutes)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 172, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kafka_server_ReplicaManager_Value{name=\"PartitionsWithLateTransactionsCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", + "legendFormat": "{{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Partitions With Late Transactions", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The rate of errors in responses counted per error code. If a response contains multiple errors, all are counted", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 171, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true, + "width": 390 + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(kafka_network_RequestMetrics_Count_total{name=\"ErrorsPerSec\",error!=\"NONE\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])) by (broker, error)", + "legendFormat": "{{ broker }}: {{error}}", + "range": true, + "refId": "A" + } + ], + "title": "Broker Errors Rate", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 64, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of topics on Kafka cluster. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "Cluster Fail" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 41 + }, + "id": 68, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(kafka_controller_KafkaController_Value{name=\"GlobalTopicCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Topics Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of partitions on Kafka cluster. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "Cluster Fail" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(143, 59, 184, 0)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100000 + }, + { + "color": "#d44a3a", + "value": 200000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 41 + }, + "id": 70, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(kafka_controller_KafkaController_Value{name=\"GlobalPartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Partitions Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Number of max partitions per broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "Cluster Fail" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(143, 59, 184, 0)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100000 + }, + { + "color": "#d44a3a", + "value": 200000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 41 + }, + "id": 173, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "max(max(kafka_server_ReplicaManager_Value{name=\"PartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Max Partitions Per Broker", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "List of topics which have at least one partition without a leader. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 46 + }, + "id": 74, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "count(kafka_cluster_topic_without_leader{namespace=\"$namespace\", cluster=\"$cluster\"}) OR on() vector(0)", + "format": "time_series", + "instant": true, + "legendFormat": "{{topics_without_leader}}", + "refId": "A" + } + ], + "title": "Topics Without Leader", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Number of partitions without an active leader. Because all read and write operations are only performed on partition leaders, a non-zero value for this metric should be alerted on to prevent service interruptions. Any partition without an active leader will be completely inaccessible, and both consumers and producers of that partition will be blocked until a leader becomes available", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "-1": { + "text": "Kafka controller is not elected" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#FA6400", + "value": null + }, + { + "color": "rgba(86, 166, 75, 0)", + "value": 0 + }, + { + "color": "#E02F44", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 46 + }, + "id": 26, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_controller_KafkaController_Value{name=\"OfflinePartitionsCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Offline Partitions Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Count of topics which have at least one under replicated partition. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + }, + { + "color": "transparent", + "value": 0 + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 46 + }, + "id": 76, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "count(group(kafka_cluster_Partition_Value{name=\"UnderReplicated\",namespace=\"$namespace\",cluster=\"$cluster\"} > 0) by (topic)) OR on() vector(0)", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Under Replicated Topics", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Count of topics with unclean leader election enabled. Unclean leader elections can lead to data loss, so you should check these topics to see if this setting is set reasonably. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "links": [ + { + "targetBlank": true, + "title": "", + "url": "/d/d16460083/kafka-topics?var-namespace=${namespace}&viewPanel=67" + } + ], + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + }, + { + "color": "transparent", + "value": 0 + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 46 + }, + "id": 104, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "auto", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "count(kafka_cluster_unclean_election_topics{namespace=\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Topics With Unclean Leader Election Enabled", + "type": "stat" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Topics", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 78, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Number of consumer groups in different states. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "right", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.hidden", + "value": true + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Total" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Stable" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "PreparingRebalance" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "CompletingRebalance" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Empty" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Dead" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 80, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroups\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsStable\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsPreparingRebalance\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsCompletingRebalance\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsEmpty\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsDead\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + } + ], + "title": "Consumer Groups Number", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Consumer Groups", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 48, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "JVM heap usage by broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 1, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(java_Memory_HeapMemoryUsage_used{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "interval": "", + "legendFormat": "{{broker}}: used", + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(java_Memory_HeapMemoryUsage_max{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "interval": "", + "legendFormat": "{{broker}}: limit", + "refId": "C" + } + ], + "title": "JVM Heap Usage", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "JVM heap usage by broker in percent (%). May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 21, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(max(java_Memory_HeapMemoryUsage_used{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker) / max(java_Memory_HeapMemoryUsage_max{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)) * 100", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "JVM Heap Usage in Percent", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Garbage collection time rate per second by broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 23, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(java_GarbageCollector_CollectionTime_total{name=\"G1 Young Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}: young", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(java_GarbageCollector_CollectionTime_total{name=\"G1 Old Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}: old", + "refId": "B" + } + ], + "title": "GC Time", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Garbage collections count rate per second by broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 82, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(java_GarbageCollector_CollectionCount_total{name=\"G1 Young Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >=0", + "interval": "", + "legendFormat": "{{broker}}: young", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(java_GarbageCollector_CollectionCount_total{name=\"G1 Old Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >=0", + "interval": "", + "legendFormat": "{{broker}}: old", + "refId": "B" + } + ], + "title": "GC Count", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "JVM Heap and GC", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 49, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Memory usage by pod. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 3 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 44, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\"}) by (pod)", + "legendFormat": "{{pod}}: allocated", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\", image!=\"\"}) by (pod)", + "legendFormat": "{{pod}}: usage", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\"}) by (exported_pod)", + "legendFormat": "{{exported_pod}}: limit", + "refId": "C" + } + ], + "title": "Memory Usage", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "RAM", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 50, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "CPU usage by pod. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Millicores", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 4 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/request/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 1 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 46, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\", image!=\"\"}[$__interval])) by (pod)) * 1000", + "legendFormat": "{{pod}}: usage", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(sum(kube_pod_container_resource_limits_cpu_cores{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\"}) by (exported_pod)) * 1000", + "legendFormat": "{{exported_pod}}: limit", + "refId": "C" + } + ], + "title": "CPU Usage", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "CPU", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 51, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "The total size in bytes of read operations on the volume for each broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 31, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(kafka_server_KafkaServer_Value{name=\"linux-disk-read-bytes\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])", + "legendFormat": "{{ broker }}", + "refId": "A" + } + ], + "title": "Disk Read Bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The total size in bytes of write operations on the volume for each broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 32, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(kafka_server_KafkaServer_Value{name=\"linux-disk-write-bytes\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])", + "legendFormat": "{{ broker }}", + "refId": "A" + } + ], + "title": "Disk Write Bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The total size in bytes of volume space occupied by topic partitions logs for each broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 34, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kafka_log_Log_Value{name=\"Size\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}) by (broker)", + "legendFormat": "{{ broker }}", + "refId": "A" + } + ], + "title": "Topic Partition Data Size", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 84, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Total number of controller requests to be sent out to brokers", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 47 + }, + "id": 86, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_controller_ControllerChannelManager_Value{name=\"TotalQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) >= 0", + "interval": "", + "legendFormat": "total_queue_size", + "refId": "A" + } + ], + "title": "Total Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Size of ControllerEventManager's queue. Every ControllerEvent has an associated state. When a ControllerEvent is processed, it triggers a state transition to the requested state. ControllerEvent events are managed by ControllerEventManager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 47 + }, + "id": 88, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_controller_ControllerEventManager_Value{name=\"EventQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) >= 0", + "interval": "", + "legendFormat": "event_queue_size", + "refId": "A" + } + ], + "title": "Event Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Time it takes for any event (except the Idle event) to wait in the ControllerEventManager's queue before being processed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 47 + }, + "id": 90, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(idelta(kafka_controller_ControllerEventManager_Count_total{name=\"EventQueueTimeMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval]))", + "interval": "", + "legendFormat": "event_queue_time_rate", + "refId": "A" + } + ], + "title": "Event Queue Time Rate", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Controller Queues", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 52, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Aggregate incoming/outgoing byte rate per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/in/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#3F6833", + "mode": "fixed" + } + }, + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/out/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E5AC0E", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 15, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesInPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}: in", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesOutPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}: out", + "refId": "B" + } + ], + "title": "Bytes In/Out", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Amount of data in bytes per second rejected by broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 17, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesRejectedPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Bytes Rejected", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Network", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 92, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "ZooKeeper session state for each broker. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "custom": { + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "broker" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "value" + }, + "properties": [ + { + "id": "displayName", + "value": "Zookeeper Session State" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 49 + }, + "id": 94, + "interval": "$inter", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "kafka_server_SessionExpireListener_Value{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "5m", + "title": "ZooKeeper Session State", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Rate of ZooKeeper requests. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 49 + }, + "id": 96, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "rate(kafka_server_ZooKeeperClientMetrics_Count_total{name=\"ZooKeeperRequestLatencyMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])", + "interval": "", + "legendFormat": "{{broker}}", + "range": true, + "refId": "A" + } + ], + "title": "ZooKeeper Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Latency of ZooKeeper requests. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 49 + }, + "id": 108, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "kafka_server_ZooKeeperClientMetrics_95thPercentile{name=\"ZooKeeperRequestLatencyMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}", + "interval": "", + "legendFormat": "{{broker}}", + "range": true, + "refId": "A" + } + ], + "title": "ZooKeeper Requests Latency", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "ZooKeeper Connection", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 49 + }, + "id": 53, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "ISR expansion rate. If a broker goes down, ISR for some of the partitions will shrink. When that broker is up again, ISR will be expanded once the replicas are fully caught up. Other than that, the expected value for both ISR shrink rate and expansion rate is 0", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 12, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(idelta(kafka_server_ReplicaManager_Count_total{name=\"IsrExpandsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Isr Expands Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "ISR shrink rate. If a broker goes down, ISR for some of the partitions will shrink. When that broker is up again, ISR will be expanded once the replicas are fully caught up. Other than that, the expected value for both ISR shrink rate and expansion rate is 0", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 13, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(idelta(kafka_server_ReplicaManager_Count_total{name=\"IsrShrinksPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Isr Shrinks Rate", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "In Sync Replica (ISR)", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 50 + }, + "id": 54, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Aggregate incoming messages rate per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 4, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"MessagesInPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Message Rate", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Messages", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 55, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Total fetch request rate per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 27, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(abs(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"TotalFetchRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m]))) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Total Fetch Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Total produce request rate per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 18, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(abs(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"TotalProduceRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\",}[5m]))) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Total Produce Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Fetch request rate per second for requests that failed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 11, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"FailedFetchRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Failed Fetch Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Produce request rate per second for requests that failed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 28, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"FailedProduceRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Failed Produce Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Size of fetch queue. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 30, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(idelta(kafka_server_Fetch_queue_size{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Fetch Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Size of request queue. A congested request queue will not be able to process incoming or outgoing requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 29, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(idelta(kafka_network_RequestChannel_Value{name=\"RequestQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Request Queue Size", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Requests", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 56, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "Number of live threads, including both daemon and non-daemon threads. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 3, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(java_Threading_ThreadCount{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Thread Count", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Total number of threads created and also started since the JVM started. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 14, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(java_Threading_TotalStartedThreadCount{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "interval": "", + "legendFormat": "{{broker}}", + "refId": "A" + } + ], + "title": "Total Started Thread Count", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "Information about configured and dead Log Cleaner and Replica Fetcher threads. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.hidden", + "value": true + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "broker" + }, + "properties": [ + { + "id": "displayName", + "value": "Broker" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Log Cleaner Threads Count" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Log Cleaner Dead Threads Count" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Replica Fetcher Threads Count" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Replica Fetcher Dead Threads Count" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 100, + "interval": "$inter", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_log_cleaner_threads_count{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_log_LogCleaner_Value{name=\"DeadThreadCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_replica_fetcher_threads_count{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(kafka_server_ReplicaFetcherManager_Value{name=\"DeadThreadCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + } + ], + "timeFrom": "5m", + "title": "Log Cleaner And Replica Fetcher Threads", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC3E95692D54ABCC0" + }, + "refId": "A" + } + ], + "title": "Threads", + "type": "row" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kafka", + "prometheus" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Platform Monitoring Prometheus", + "value": "PC3E95692D54ABCC0" + }, + "hide": 0, + "includeAll": false, + "label": "Cloud", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(kafka_controller_KafkaController_Value, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "kafka-service", + "value": "kafka-service" + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(kafka_controller_KafkaController_Value{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_controller_KafkaController_Value{namespace=\"$namespace\", cluster=\"$cluster\"}, broker)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "120s", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_inter" + }, + "hide": 0, + "label": "Sampling", + "name": "inter", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_inter" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Monitoring", + "uid": "e31fe3150b0d4da69b5147cdf7f60ded0a6a0445", + "version": 4, + "weekStart": "" + } +--- +# Source: kafka-service/templates/kafka-topics-dashboard.yaml +apiVersion: integreatly.org/v1alpha1 +kind: GrafanaDashboard +metadata: + name: kafka-topics-grafana-dashboard + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + app: grafana +spec: + name: kafka-topics-dashboard.json + json: > + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Kafka Topics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 389, + "links": [], + "panels": [ + { + "collapsed": true, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 3, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "The count of partitions, number of messages and size in bytes for each topic in descending order of size values", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 20, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(sum(kafka_log_Log_Value{name=\"LogEndOffset\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker) - sum(kafka_log_Log_Value{name=\"LogStartOffset\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", + "format": "table", + "instant": true, + "range": false, + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", + "format": "table", + "instant": true, + "range": false, + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(count(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", + "format": "table", + "instant": true, + "range": false, + "refId": "C" + } + ], + "title": "Topics", + "transformations": [ + { + "id": "merge", + "options": { + + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "Value #A": 3, + "Value #B": 4, + "Value #C": 2, + "topic": 1 + }, + "renameByName": { + "Value #A": "Number of Messages", + "Value #B": "Size", + "Value #C": "Partitions Count", + "topic": "Topic Name" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { + + }, + "sort": [ + { + "desc": true, + "field": "Size" + } + ] + } + } + ], + "type": "table" + } + ], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Overview", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 61, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "The list of topic partitions whose in-sync replicas (ISR) count is less than `minIsr` for each broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 66, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderMinIsr\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, partition) == 1", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Under Min ISR Partitions Table", + "transformations": [ + { + "id": "merge", + "options": { + + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "includeByName": { + + }, + "indexByName": { + "Time": 0, + "Value": 4, + "broker": 2, + "partition": 3, + "topic": 1 + }, + "renameByName": { + "Time": "", + "broker": "Broker", + "partition": "Partition", + "topic": "Topic" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "The number of topic under-replicated partitions for each broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 65, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderReplicated\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, partition) == 1", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Under Replicated Partitions Table", + "transformations": [ + { + "id": "merge", + "options": { + + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "includeByName": { + + }, + "indexByName": { + "Time": 0, + "Value": 4, + "broker": 2, + "partition": 3, + "topic": 1 + }, + "renameByName": { + "Time": "", + "broker": "Broker", + "partition": "Partition", + "topic": "Topic" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "List of topics with unclean leader election enabled. Unclean leader elections can lead to data loss, so you should check these topics to see if this setting is set reasonably. May be not applicable for managed external Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 2 + }, + "id": 67, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kafka_cluster_unclean_election_topics{namespace=\"$namespace\", topic=~\"$topic\"}) by (topic)", + "format": "table", + "instant": true, + "legendFormat": "{{ topic }} - {{ broker }}", + "range": false, + "refId": "A" + } + ], + "title": "Unclean Election Leader Topics Table", + "transformations": [ + { + "id": "merge", + "options": { + + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "includeByName": { + + }, + "indexByName": { + + }, + "renameByName": { + "Time": "", + "topic": "Topic" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The total size in bytes of volume space occupied by specific topic for each broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 31, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "refId": "A" + } + ], + "title": "Partition Data Size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The number of topic partitions whose in-sync replicas (ISR) count is less than `minIsr` for each broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 63, + "interval": "$inter", + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderMinIsr\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Under Min ISR Partitions", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Topic Issues", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 23, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "description": "The incoming messages rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 40, + "interval": "$inter", + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"MessagesInPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "range": true, + "refId": "A" + } + ], + "title": "Incoming Messages Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The incoming bytes rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 50, + "interval": "$inter", + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesInPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "refId": "A" + } + ], + "title": "Incoming Bytes Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The outgoing bytes rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 60, + "interval": "$inter", + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesOutPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "refId": "A" + } + ], + "title": "Outgoing Bytes Rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "The total size in bytes of volume space occupied by specific topic for each broker", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 68, + "interval": "$inter", + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", + "format": "time_series", + "legendFormat": "{{ topic }} - {{ broker }}", + "refId": "A" + } + ], + "title": "Partition Data Size", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Topics Information", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "kafka", + "prometheus", + "kafka_topics" + ], + "templating": { + "list": [ + { + "hide": 0, + "includeAll": false, + "label": "Cloud", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(kafka_controller_KafkaController_Value, cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(kafka_controller_KafkaController_Value{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_controller_KafkaController_Value{namespace=\"$namespace\", cluster=\"$cluster\"}, broker)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "query_result(count by (topic)(count_over_time(kafka_log_Log_Value{namespace=\"$namespace\", topic=~\"$topics_regex\", cluster=\"$cluster\", name=\"Size\"}[1h])))", + "hide": 0, + "includeAll": true, + "label": "Topic", + "multi": true, + "name": "topic", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(count by (topic)(count_over_time(kafka_log_Log_Value{namespace=\"$namespace\", topic=~\"$topics_regex\", cluster=\"$cluster\", name=\"Size\"}[1h])))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "/topic=\"(?\u003Ctext\u003E[^\"]+)/g", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "datasource": "$datasource", + "hide": 0, + "label": "Topics Regex", + "name": "topics_regex", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "skipUrlSync": false, + "type": "textbox" + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "120s", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_inter" + }, + "datasource": "$datasource", + "hide": 0, + "label": "Sampling", + "name": "inter", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_inter" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,2m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Topics", + "uid": "d16460083", + "version": 4, + "weekStart": "" + } +--- +# Source: kafka-service/templates/cr.yaml +apiVersion: netcracker.com/v7 +kind: KafkaService +metadata: + name: kafka + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + app.kubernetes.io/instance: kafka-montemplate + app.kubernetes.io/processed-by-operator: kafka-service-operator + annotations: + netcracker.com/reconcile-trigger: "D94QmztZ0B" +spec: + global: + waitForPodsReady: true + podReadinessTimeout: 600 + kraft: + enabled: false + defaultLabels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: 'backend' + app.kubernetes.io/managed-by: operator + app.kubernetes.io/managed-by-operator: kafka-service-operator + kafkaSaslMechanism: SCRAM-SHA-512 + kafkaSsl: + enabled: false + secretName: "" + monitoring: + dockerImage: ghcr.io/netcracker/qubership-kafka-monitoring:main + minVersion: 2.0.0 + maxVersion: 3.x.x + dataCollectionInterval: 10s + kafkaExecPluginTimeout: 10s + kafkaTotalBrokerCount: 3 + secretName: kafka-monitoring-secret + securityContext: + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" + resources: + requests: + memory: 128Mi + cpu: 50m + limits: + memory: 256Mi + cpu: 200m + monitoringType: prometheus + akhq: + dockerImage: ghcr.io/netcracker/qubership-docker-akhq:main + bootstrapServers: kafka:9092 + kafkaPollTimeout: 10000 + enableAccessLog: false + securityContext: + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" + ldap: + enabled: false + heapSize: 300 + resources: + requests: + memory: 600Mi + cpu: 50m + limits: + memory: 1200Mi + cpu: 400m + schemaRegistryType: confluent +--- +# Source: kafka-service/templates/prometheus_rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: monitoring + prometheus: Kafka-rules + role: alert-rules + name: prometheus-kafka-service-rules +spec: + groups: + - name: default-kafka-montemplate + rules: + - alert: KafkaIsDegradedAlert + annotations: + description: 'Kafka is Degraded' + summary: Some of Kafka Service pods are down + expr: kafka_cluster_status{namespace="default",container="kafka-monitoring"} == 6 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaMetricsAreAbsent + annotations: + description: 'Kafka metrics are absent on default.' + summary: Kafka metrics are absent + expr: absent(kafka_cluster_status{namespace="default"}) == 1 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaIsDownAlert + annotations: + description: 'Kafka is Down' + summary: All of Kafka Service pods are down + expr: kafka_cluster_status{namespace="default",container="kafka-monitoring"} == 10 + for: 3m + labels: + severity: critical + namespace: default + service: kafka-montemplate + - alert: KafkaCPUUsageAlert + annotations: + description: 'Kafka CPU usage is higher than 95 percents' + summary: Some of Kafka Service pods load CPU higher then 95 percents + expr: max(rate(container_cpu_usage_seconds_total{namespace="default",pod=~"kafka-[0-9].*",container="kafka"}[5m])) / max(kube_pod_container_resource_limits_cpu_cores{exported_namespace="default",exported_pod=~"kafka-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaMemoryUsageAlert + annotations: + description: 'Kafka memory usage is higher than 95 percents' + summary: Some of Kafka Service pods use memory higher then 95 percents + expr: max(container_memory_working_set_bytes{namespace="default",pod=~"kafka-[0-9].*",container="kafka"}) / max(kube_pod_container_resource_limits_memory_bytes{exported_namespace="default",exported_pod=~"kafka-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaHeapMemoryUsageAlert + annotations: + description: 'Kafka heap memory usage is higher than 95 percents' + summary: Some of Kafka Service pods use heap memory higher then 95 percents + expr: max(java_Memory_HeapMemoryUsage_used{namespace="default",broker=~"kafka-[0-9].*"}) / max(java_Memory_HeapMemoryUsage_max{namespace="default", broker=~"kafka-[0-9].*"}) > 0.95 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaGCCountAlert + annotations: + description: 'Some of Kafka Service pods have Garbage collections count rate higher than 10' + summary: Some of Kafka Service pods have Garbage collections count rate higher than 10 + expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="default", broker=~"kafka-[0-9].*"}[5m])) > 10 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaLagAlert + annotations: + description: 'Some of Kafka Service pods have partition lag higher than 1000' + summary: Some of Kafka Service pods have partition lag higher than 1000 + expr: max(kafka_consumergroup_group_lag{namespace="default"}) > 1000 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaPartitionCountAlert + annotations: + description: 'Kafka Partition count for {{ $labels.broker }} broker is higher than 4000' + summary: Some of Kafka Partition count is higher than 4000 + expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 4000 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaBrokerSkewAlert + annotations: + description: 'Kafka Broker Skew for {{ $labels.broker }} broker is higher than 50%' + summary: Some of Kafka Broker Skew is higher than 50% + expr: (kafka_broker_skew{namespace="default", container="kafka-monitoring", broker=~"kafka-[0-9].*"} > 50) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 3) + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: KafkaBrokerLeaderSkewAlert + annotations: + description: 'Kafka Broker Leader Skew for {{ $labels.broker }} broker is higher than 50%' + summary: Some of Kafka Broker Leader Skew is higher than 50% + expr: (kafka_broker_leader_skew{namespace="default", container="kafka-monitoring", broker=~"kafka-[0-9].*"} > 50) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 3) + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate + - alert: SupplementaryServicesCompatibilityAlert + annotations: + description: 'Kafka supplementary services in namespace {{ $labels.namespace }} is not compatible with Kafka version {{ $labels.application_version }}' + summary: 'Kafka supplementary services in namespace {{ $labels.namespace }} is not compatible with Kafka version {{ $labels.application_version }}, allowed range is {{ $labels.min_version }} - {{ $labels.max_version }}' + expr: supplementary_services_version_compatible{application="kafka", namespace="default"} != 1 + for: 3m + labels: + severity: warning + namespace: default + service: kafka-montemplate +--- +# Source: kafka-service/templates/tls_static_metrics.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: monitoring + prometheus: kafka-tls-static-metrics-rules + role: record-rules + name: kafka-tls-static-metrics-rules +spec: + groups: + - name: default-kafka-montemplate + rules: + - expr: 0 + labels: + namespace: "default" + application: "kafka-service" + service: "kafka" + + record: service:tls_status:info +--- +# Source: kafka-service/templates/service_monitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kafka-service-monitor + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kafka-service-monitor + app.kubernetes.io/component: monitoring +spec: + endpoints: + - interval: 60s + scrapeTimeout: 10s + port: prometheus-cli + scheme: http + jobLabel: k8s-app + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: kafka-monitoring + name: kafka-monitoring +--- +# Source: kafka-service/templates/service_monitor_jmx_exporter.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kafka-service-monitor-jmx-exporter + labels: + app.kubernetes.io/version: '' + app.kubernetes.io/part-of: 'kafka-services' + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kafka-service-monitor-jmx-exporter + app.kubernetes.io/component: monitoring +spec: + endpoints: + - interval: 60s + scrapeTimeout: 10s + port: prometheus-http + scheme: http + basicAuth: + username: + name: kafka-secret + key: client-username + password: + name: kafka-secret + key: client-password + jobLabel: k8s-app + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: kafka + clusterName: kafka +--- +# Source: kafka-service/templates/pre-deploy/ownerref-migrator-sa.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-services-ownerref-migrator + labels: + app.kubernetes.io/instance: kafka-montemplate + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-200" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +--- +# Source: kafka-service/templates/pre-deploy/ownerref-migrator-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kafka-services-ownerref-migrator + labels: + app.kubernetes.io/instance: kafka-montemplate + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-190" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +rules: + - apiGroups: ["apps"] + resources: ["deployments","statefulsets"] + verbs: ["get","list","patch","update"] + - apiGroups: [""] + resources: ["configmaps","secrets","services","persistentvolumeclaims"] + verbs: ["get","list","patch","update"] +--- +# Source: kafka-service/templates/pre-deploy/ownerref-migrator-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kafka-services-ownerref-migrator + labels: + app.kubernetes.io/instance: kafka-montemplate + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-180" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + "argocd.argoproj.io/hook": PreSync + "argocd.argoproj.io/hook-delete-policy": HookSucceeded +subjects: + - kind: ServiceAccount + name: kafka-services-ownerref-migrator + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kafka-services-ownerref-migrator +--- +# Source: kafka-service/templates/pre-deploy/ownerref-migrator-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: kafka-services-ownerref-migrator + labels: + app.kubernetes.io/instance: kafka-montemplate + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + template: + metadata: + labels: + app.kubernetes.io/instance: kafka-montemplate + spec: + serviceAccountName: kafka-services-ownerref-migrator + restartPolicy: OnFailure + securityContext: + runAsNonRoot: true + seccompProfile: { type: RuntimeDefault } + containers: + - name: migrator + image: ghcr.io/netcracker/qubership-docker-kubectl:main + imagePullPolicy: Always + command: ["/bin/sh","-c"] + args: + - | + set -euo pipefail + + # Ensure jq is present + command -v jq >/dev/null 2>&1 || { echo "[migrator] jq is required"; exit 1; } + + # Config + KUBECTL="kubectl" + NS="default" + RESOURCES="${RESOURCES:-deployments,statefulsets,configmaps,secrets,services,persistentvolumeclaims}" + SELECTOR="" + OLD_GROUP="qubership.org/" + + # Build optional selector flag + if [ -n "${SELECTOR}" ]; then SEL="-l ${SELECTOR}"; else SEL=""; fi + + IFS=','; for r in $RESOURCES; do + r="$(echo "$r" | xargs)"; [ -z "$r" ] && continue + + # Get items JSON; on RBAC/list error warn and continue + ITEMS_JSON="$($KUBECTL -n "$NS" get "$r" $SEL -o json 2>/dev/null || true)" + if [ -z "$ITEMS_JSON" ]; then + echo "[migrator][WARN] cannot list $r (RBAC or not found)"; continue + fi + + # Extract names having ownerRefs with apiVersion starting with OLD_GROUP + NAMES="$(printf "%s" "$ITEMS_JSON" \ + | jq -r --arg grp "$OLD_GROUP" '.items[] + | select([(.metadata.ownerReferences // [])[]? + | .apiVersion|tostring + | startswith($grp)] | any) + | .metadata.name')" + + [ -z "$NAMES" ] && continue + # Iterate names without here-strings (portable for /bin/sh) + printf "%s\n" "$NAMES" | while IFS= read -r name; do + [ -z "$name" ] && continue + + OBJ_JSON="$($KUBECTL -n "$NS" get "$r" "$name" -o json 2>/dev/null || true)" + if [ -z "$OBJ_JSON" ]; then + echo "[migrator][WARN] cannot get $r/$name"; continue + fi + + # Build merge-patch that removes matching ownerRefs + PATCH="$(printf "%s" "$OBJ_JSON" | jq -c --arg grp "$OLD_GROUP" ' + {"metadata":{"ownerReferences":[ + (.metadata.ownerReferences // [])[] + | select((.apiVersion|tostring)|startswith($grp)|not) + ]}}')" + + if $KUBECTL -n "$NS" patch "$r" "$name" --type=merge -p "$PATCH" >/dev/null 2>&1; then + echo "[migrator] $r/$name patched" + else + echo "[migrator][WARN] patch failed for $r/$name" + fi + done + done + + echo "[migrator] done" + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 20m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } diff --git a/operator/tests/alerts-tests/test.yaml b/operator/tests/alerts-tests/test.yaml new file mode 100644 index 00000000..8d697449 --- /dev/null +++ b/operator/tests/alerts-tests/test.yaml @@ -0,0 +1,356 @@ +rule_files: +- rules.yaml +evaluation_interval: 1m +tests: +- interval: 1m + input_series: + - series: kafka_cluster_status{namespace="default",container="kafka-monitoring"} + values: "6x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaIsDegradedAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + container: kafka-monitoring + exp_annotations: + description: Kafka is Degraded + summary: Some of Kafka Service pods are down + +- interval: 1m + input_series: + - series: kafka_cluster_status{namespace="default",container="kafka-monitoring"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaIsDegradedAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_cluster_status + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaMetricsAreAbsent + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Kafka metrics are absent on default. + summary: Kafka metrics are absent + +- interval: 1m + input_series: + - series: kafka_cluster_status{namespace="default"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaMetricsAreAbsent + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_cluster_status{namespace="default",container="kafka-monitoring"} + values: "10x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaIsDownAlert + exp_alerts: + - exp_labels: + severity: critical + namespace: default + service: kafka-montemplate + container: kafka-monitoring + exp_annotations: + description: Kafka is Down + summary: All of Kafka Service pods are down + +- interval: 1m + input_series: + - series: kafka_cluster_status{namespace="default",container="kafka-monitoring"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaIsDownAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: container_cpu_usage_seconds_total{namespace="default",pod="kafka-0",container="kafka"} + values: "300+300x5" + - series: kube_pod_container_resource_limits_cpu_cores{exported_namespace="default",exported_pod="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaCPUUsageAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Kafka CPU usage is higher than 95 percents + summary: Some of Kafka Service pods load CPU higher then 95 percents + +- interval: 1m + input_series: + - series: container_cpu_usage_seconds_total{namespace="default",pod="kafka-0",container="kafka"} + values: "0x5" + - series: kube_pod_container_resource_limits_cpu_cores{exported_namespace="default",exported_pod="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaCPUUsageAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: container_memory_working_set_bytes{namespace="default",pod="kafka-0",container="kafka"} + values: "1x5" + - series: kube_pod_container_resource_limits_memory_bytes{exported_namespace="default",exported_pod="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaMemoryUsageAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Kafka memory usage is higher than 95 percents + summary: Some of Kafka Service pods use memory higher then 95 percents + +- interval: 1m + input_series: + - series: container_memory_working_set_bytes{namespace="default",pod="kafka-0",container="kafka"} + values: "0x5" + - series: kube_pod_container_resource_limits_memory_bytes{exported_namespace="default",exported_pod="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaMemoryUsageAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: java_Memory_HeapMemoryUsage_used{namespace="default",broker="kafka-0"} + values: "1x5" + - series: java_Memory_HeapMemoryUsage_max{namespace="default",broker="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaHeapMemoryUsageAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Kafka heap memory usage is higher than 95 percents + summary: Some of Kafka Service pods use heap memory higher then 95 percents + +- interval: 1m + input_series: + - series: java_Memory_HeapMemoryUsage_used{namespace="default",broker="kafka-0"} + values: "0x5" + - series: java_Memory_HeapMemoryUsage_max{namespace="default",broker="kafka-0"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaHeapMemoryUsageAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: java_GarbageCollector_CollectionCount_total{namespace="default", broker="kafka-0"} + values: "3001+3001x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaGCCountAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Some of Kafka Service pods have Garbage collections count rate higher than 10 + summary: Some of Kafka Service pods have Garbage collections count rate higher than 10 + +- interval: 1m + input_series: + - series: java_GarbageCollector_CollectionCount_total{namespace="default", broker="kafka-0"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaGCCountAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_consumergroup_group_lag{namespace="default"} + values: "1001x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaLagAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + exp_annotations: + description: Some of Kafka Service pods have partition lag higher than 1000 + summary: Some of Kafka Service pods have partition lag higher than 1000 + +- interval: 1m + input_series: + - series: kafka_consumergroup_group_lag{namespace="default"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaLagAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "4001x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaPartitionCountAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + broker: kafka-0 + name: PartitionCount + exp_annotations: + description: Kafka Partition count for kafka-0 broker is higher than 4000 + summary: Some of Kafka Partition count is higher than 4000 + +- interval: 1m + input_series: + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaPartitionCountAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_broker_skew{namespace="default", container="kafka-monitoring", broker="kafka-0"} + values: "51x5" + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "4x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaBrokerSkewAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + broker: kafka-0 + container: kafka-monitoring + exp_annotations: + description: Kafka Broker Skew for kafka-0 broker is higher than 50% + summary: Some of Kafka Broker Skew is higher than 50% + +- interval: 1m + input_series: + - series: kafka_broker_skew{namespace="default", container="kafka-monitoring", broker="kafka-0"} + values: "0x5" + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaBrokerSkewAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: kafka_broker_leader_skew{namespace="default", container="kafka-monitoring", broker="kafka-0"} + values: "51x5" + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "4x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaBrokerLeaderSkewAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + broker: kafka-0 + container: kafka-monitoring + exp_annotations: + description: Kafka Broker Leader Skew for kafka-0 broker is higher than 50% + summary: Some of Kafka Broker Leader Skew is higher than 50% + +- interval: 1m + input_series: + - series: kafka_broker_leader_skew{namespace="default", container="kafka-monitoring", broker="kafka-0"} + values: "0x5" + - series: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker="kafka-0"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: KafkaBrokerLeaderSkewAlert + exp_alerts: [] + +- interval: 1m + input_series: + - series: supplementary_services_version_compatible{application="kafka", namespace="default"} + values: "0x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: SupplementaryServicesCompatibilityAlert + exp_alerts: + - exp_labels: + severity: warning + namespace: default + service: kafka-montemplate + application: kafka + exp_annotations: + description: "Kafka supplementary services in namespace default is not compatible with Kafka version " + summary: "Kafka supplementary services in namespace default is not compatible with Kafka version , allowed range is - " + +- interval: 1m + input_series: + - series: supplementary_services_version_compatible{application="kafka", namespace="default"} + values: "1x5" + alert_rule_test: + - eval_time: 5m + groupname: default-kafka-montemplate + alertname: SupplementaryServicesCompatibilityAlert + exp_alerts: [] \ No newline at end of file diff --git a/operator/tests/alerts-tests/tests-checker.sh b/operator/tests/alerts-tests/tests-checker.sh new file mode 100644 index 00000000..21fa71e8 --- /dev/null +++ b/operator/tests/alerts-tests/tests-checker.sh @@ -0,0 +1,33 @@ +rules=() +readarray -t rules < <(yq eval '.groups[].rules[].alert' ./rules.yaml) +tests=() +readarray -t tests < <(yq '.tests[].alert_rule_test[].alertname' ./test.yaml) +errorrules=() +errorcount=() +i=0 + +for item in "${rules[@]}"; do +count=0 + + for j in "${tests[@]}"; do + if [[ "$j" == "$item" ]]; then + ((count++)) + fi + done +if [[ "$count" -lt 2 ]]; then +errorrules[i]="$item" +errorcount[i]="$count" +((i++)) +fi +done + +if [[ "$i" -gt 0 ]]; then +echo "This alert rules dont have all required tests (minimum 2 tests per rule needed):" + for k in "${!errorrules[@]}"; do + echo "Alert: ${errorrules[k]}, Tests found: ${errorcount[k]}" + done +exit 1 +else +echo "All alert rules has required tests" +exit 0 +fi \ No newline at end of file From a4099a67ed51df480ea99ed722968bd959febd68 Mon Sep 17 00:00:00 2001 From: FedorProshin Date: Mon, 17 Nov 2025 17:16:37 +0300 Subject: [PATCH 2/4] feat: added backward compatibility in values yaml for alerts, added alerts test using vmalert tool - fixed linter errors --- .github/workflows/alerts-test.yml | 3 + operator/charts/helm/kafka-service/Chart.yaml | 3 +- operator/tests/alerts-tests/rules.yaml | 9251 ----------------- 3 files changed, 5 insertions(+), 9252 deletions(-) delete mode 100644 operator/tests/alerts-tests/rules.yaml diff --git a/.github/workflows/alerts-test.yml b/.github/workflows/alerts-test.yml index ca272ae3..7b0227ef 100644 --- a/.github/workflows/alerts-test.yml +++ b/.github/workflows/alerts-test.yml @@ -12,6 +12,9 @@ env: max_attempts: 30 delay: 10 +permissions: + contents: read + jobs: Run-Alerts-Test: runs-on: ubuntu-latest diff --git a/operator/charts/helm/kafka-service/Chart.yaml b/operator/charts/helm/kafka-service/Chart.yaml index 45ad2420..d4e24990 100644 --- a/operator/charts/helm/kafka-service/Chart.yaml +++ b/operator/charts/helm/kafka-service/Chart.yaml @@ -25,4 +25,5 @@ dependencies: - name: prometheusrules condition: monitoring.install version: ~0 - repository: "file://charts/prometheusrules" \ No newline at end of file + repository: "file://charts/prometheusrules" + \ No newline at end of file diff --git a/operator/tests/alerts-tests/rules.yaml b/operator/tests/alerts-tests/rules.yaml deleted file mode 100644 index 741050b3..00000000 --- a/operator/tests/alerts-tests/rules.yaml +++ /dev/null @@ -1,9251 +0,0 @@ ---- -# Source: kafka-service/templates/operator/service_account.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kafka-service-operator - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' ---- -# Source: kafka-service/templates/akhq-secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: akhq-secret - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' -type: Opaque -stringData: - akhq_default_user: "" - akhq_default_password: "" ---- -# Source: kafka-service/templates/kafka-monitoring-secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: kafka-monitoring-secret - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' -type: Opaque -stringData: - prometheus-username: "" - prometheus-password: "" ---- -# Source: kafka-service/templates/kafka-services-secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: kafka-services-secret - labels: - automation.infra/secret-change: "true" - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - annotations: - kafkaservice.netcracker.com/auto-restart: "true" -type: Opaque -stringData: - admin-username: "" - admin-password: "" - client-username: "" - client-password: "" ---- -# Source: kafka-service/templates/kafka-monitoring-configuration.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: kafka-monitoring-configuration - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - name: kafka-monitoring - component: kafka-monitoring -data: - config: |- - # Telegraf Configuration - # - # Telegraf is entirely plugin driven. All metrics are gathered from the - # declared inputs, and sent to the declared outputs. - # - # Plugins must be declared in here to be active. - # To deactivate a plugin, comment out the name and any variables. - # - # Use 'telegraf -config telegraf.conf -test' to see what metrics a config - # file would generate. - # - # Environment variables can be used anywhere in this config file, simply prepend - # them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), - # for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) - - - # Global tags can be specified here in key="value" format. - [global_tags] - # dc = "us-east-1" # will tag all metrics with dc=us-east-1 - # rack = "1a" - ## Environment variables can be used as tags, and throughout the config file - # user = "$USER" - project_name= "$OS_PROJECT" - - - # Configuration for telegraf agent - [agent] - ## Default data collection interval for all inputs - interval = "$DATA_COLLECTION_INTERVAL" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. - round_interval = true - - ## Telegraf will send metrics to outputs in batches of at most - ## metric_batch_size metrics. - ## This controls the size of writes that Telegraf sends to output plugins. - metric_batch_size = 1000 - - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). - metric_buffer_limit = 10000 - - ## Collection jitter is used to jitter the collection by a random amount. - ## Each plugin will sleep for a random time within jitter before collecting. - ## This can be used to avoid many plugins querying things like sysfs at the - ## same time, which can have a measurable effect on the system. - collection_jitter = "0s" - - ## Default flushing interval for all outputs. You shouldn't set this below - ## interval. Maximum flush_interval will be flush_interval + flush_jitter - flush_interval = "10s" - ## Jitter the flush interval by a random amount. This is primarily to avoid - ## large write spikes for users running a large number of telegraf instances. - ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s - flush_jitter = "0s" - - ## By default, precision will be set to the same timestamp order as the - ## collection interval, with the maximum being 1s. - ## Precision will NOT be used for service inputs, such as logparser and statsd. - ## Valid values are "ns", "us" (or "µs"), "ms", "s". - precision = "" - - ## Logging configuration: - ## Run telegraf with debug log messages. - debug = true - ## Run telegraf in quiet mode (error log messages only). - quiet = false - ## Specify the log file name. The empty string means to log to stderr. - logfile = "" - - ## Override default hostname, if empty use os.Hostname() - hostname = "" - ## If set to true, do no set the "host" tag in the telegraf agent. - omit_hostname = false - - - ############################################################################### - # OUTPUT PLUGINS # - ############################################################################### - # Publish all metrics to /metrics for Prometheus to scrape - [[outputs.prometheus_client]] - ## Address to listen on. - listen = ":8096" - - ## Metric version controls the mapping from Telegraf metrics into - ## Prometheus format. When using the prometheus input, use the same value in - ## both plugins to ensure metrics are round-tripped without modification. - ## - ## example: metric_version = 1; deprecated in 1.13 - ## metric_version = 2; recommended version - # metric_version = 1 - - ## Use HTTP Basic Authentication. - basic_username = "$PROMETHEUS_USERNAME" - basic_password = "$PROMETHEUS_PASSWORD" - ## If set, the IP Ranges which are allowed to access metrics. - ## ex: ip_range = ["192.168.0.0/24", "192.168.1.0/30"] - # ip_range = [] - - ## Path to publish the metrics on. - # path = "/metrics" - - ## Expiration interval for each metric. 0 == no expiration - # expiration_interval = "60s" - - ## Collectors to enable, valid entries are "gocollector" and "process". - ## If unset, both are enabled. - # collectors_exclude = ["gocollector", "process"] - - ## Send string metrics as Prometheus labels. - ## Unless set to false all string metrics will be sent as labels. - # string_as_label = true - - ## If set, enable TLS with the given certificate. - # tls_cert = "/etc/ssl/telegraf.crt" - # tls_key = "/etc/ssl/telegraf.key" - - ## Set one or more allowed client CA certificate file names to - ## enable mutually authenticated TLS connections - # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] - - ## Export metric collection time. - # export_timestamp = false - ############################################################################### - # PROCESSOR PLUGINS # - ############################################################################### - - # # Print all metrics that pass through this filter. - # [[processors.printer]] - - - ############################################################################### - # AGGREGATOR PLUGINS # - ############################################################################### - - # # Keep the aggregate min/max of each metric passing through. - # [[aggregators.minmax]] - # ## General Aggregator Arguments: - # ## The period on which to flush & clear the aggregator. - # period = "30s" - # ## If true, the original metric will be dropped by the - # ## aggregator and will not get sent to the output plugins. - # drop_original = false - - - ############################################################################### - # INPUT PLUGINS # - ############################################################################### - - # Read metrics from one or more commands that can output to stdout - [[inputs.exec]] - ## Commands array - commands = [ - "python3 /opt/kafka-monitoring/exec-scripts/kafka_metric.py", - "/additional-metrics" - ] - - ## Timeout for each command to complete. - timeout = "$KAFKA_EXEC_PLUGIN_TIMEOUT" - - ## Data format to consume. - ## Each data format has it's own unique set of configuration options, read - ## more about them here: - ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md - data_format = "influx" - - ############################################################################### - # SERVICE INPUT PLUGINS # - ############################################################################### ---- -# Source: kafka-service/templates/operator/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: kafka-service-operator - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' -rules: - - apiGroups: - - apps - resources: - - deployments - - replicasets - - statefulsets - - daemonsets - verbs: - - get - - create - - list - - update - - watch - - patch - - delete - - apiGroups: - - "" - resources: - - pods - - configmaps - - services - - persistentvolumeclaims - - secrets - - serviceaccounts - verbs: - - get - - create - - list - - update - - watch - - patch - - delete - - apiGroups: - - netcracker.com - resources: - - '*' - verbs: - - get - - list - - watch - - create - - update - - patch - - delete ---- -# Source: kafka-service/templates/operator/role_binding.yaml -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: kafka-service-operator - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' -subjects: - - kind: ServiceAccount - name: kafka-service-operator -roleRef: - kind: Role - name: kafka-service-operator - apiGroup: rbac.authorization.k8s.io ---- -# Source: kafka-service/templates/operator/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: kafka-service-operator - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - app.kubernetes.io/name: kafka-service-operator - name: kafka-service-operator - app.kubernetes.io/instance: kafka-service-operator-default - app.kubernetes.io/technology: go -spec: - replicas: 1 - selector: - matchLabels: - name: kafka-service-operator - strategy: - type: Recreate - template: - metadata: - annotations: - kubectl.kubernetes.io/default-container: manager - labels: - name: kafka-service-operator - app.kubernetes.io/name: kafka-service-operator - app.kubernetes.io/technology: "go" - spec: - serviceAccountName: kafka-service-operator - containers: - - name: kafka-service-operator - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - image: ghcr.io/netcracker/qubership-kafka-service-operator:main - command: - - /manager - imagePullPolicy: Always - env: - - name: WATCH_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: KMM_ENABLED - value: "false" - - name: OPERATOR_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: OPERATOR_NAME - value: kafka-service-operator - - name: KMM_CONFIG_RECONCILE_PERIOD_SECONDS - value: "100" - - name: OPERATOR_MODE - value: "kafkaservice" - - name: CLUSTER_NAME - value: kafka - - name: API_GROUP - value: netcracker.com - resources: - requests: - memory: 512Mi - cpu: 100m - limits: - memory: 512Mi - cpu: 100m - livenessProbe: - httpGet: - path: /healthz - port: 8081 - scheme: HTTP - initialDelaySeconds: 30 - timeoutSeconds: 5 - periodSeconds: 15 - successThreshold: 1 - failureThreshold: 5 - readinessProbe: - httpGet: - path: /readyz - port: 8081 - scheme: HTTP - initialDelaySeconds: 40 - timeoutSeconds: 15 - periodSeconds: 15 - successThreshold: 1 - failureThreshold: 5 - securityContext: - runAsNonRoot: true - seccompProfile: - type: "RuntimeDefault" ---- -# Source: kafka-service/templates/akhq-ingress.yaml -# Ingress for AKHQ UI ---- -# Source: kafka-service/templates/grafana_dashboard.yaml -apiVersion: integreatly.org/v1alpha1 -kind: GrafanaDashboard -metadata: - name: kafka-grafana-dashboard - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - app: grafana -spec: - name: kafka-dashboard.json - json: > - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Kafka Monitoring", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 436, - "links": [ - { - "icon": "dashboard", - "tags": [], - "targetBlank": true, - "title": "Kubernetes / Pod Resources", - "type": "link", - "url": "/d/uaHNcPvMz/kubernetes-pod-resources?var-namespace=${namespace}" - }, - { - "icon": "dashboard", - "tags": [], - "targetBlank": true, - "title": "Kafka Lag Exporter", - "tooltip": "Open Kafka Lag Exporter dashboard (when corresponding Lag Exporter is enabled). ", - "type": "link", - "url": "/d/8LW1Yd8ik/kafka-lag-exporter?var-namespace=${namespace}" - }, - { - "icon": "dashboard", - "tags": [], - "targetBlank": true, - "title": "Kafka Topics", - "tooltip": "Open Kafka Topics dashboard", - "type": "link", - "url": "/d/d16460083/kafka-topics?var-namespace=${namespace}" - }, - { - "icon": "dashboard", - "tags": [], - "targetBlank": true, - "title": "Node Details", - "tooltip": "Open Node Details dashboard for external managed Kafka node exporter.", - "type": "link", - "url": "/d/rYdddlPWk/node-details?var-job=kafka-node-exporter" - }, - { - "icon": "dashboard", - "tags": [], - "targetBlank": true, - "title": "Namespace Resources", - "tooltip": "Open Namespace Resources dashboard", - "type": "link", - "url": "/d/EEjW8rVGz/kubernetes-namespace-resources?var-namespace=${namespace}" - }, - { - "asDropdown": false, - "icon": "dashboard", - "includeVars": false, - "keepTime": false, - "tags": [], - "targetBlank": true, - "title": "Backup Daemon Dashboard", - "tooltip": "", - "type": "link", - "url": "/d/9EEzia24k/backup-daemon?var-namespace=${namespace}" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 47, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Cluster Overview", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Current status of Kafka cluster", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "0": { - "text": "UP" - }, - "6": { - "text": "DEGRADED" - }, - "10": { - "text": "DOWN" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(81, 167, 44, 0.9)", - "value": null - }, - { - "color": "rgba(199, 132, 42, 0.89)", - "value": 1 - }, - { - "color": "rgba(206, 36, 50, 0.97)", - "value": 7 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 5, - "x": 0, - "y": 1 - }, - "id": 33, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Cluster Status", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Current size of Kafka cluster", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 5, - "y": 1 - }, - "id": 9, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "min(kafka_cluster_size{namespace=\"$namespace\", cluster=\"$cluster\"})", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Cluster Size", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Current count of ready pods. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "N/A" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 8, - "y": 1 - }, - "id": 102, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kube_pod_status_ready{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\", condition=\"true\"}) OR on() vector(-1)", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Ready Pods", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Current state of controller broker. The controller changes state when processing an event. Most of the time, the Idle state is displayed, which means that the controller has processed all the events. If the event processing takes a considerable time (several seconds or more), this state of the controller will be displayed. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "0": { - "index": 18, - "text": "Idle" - }, - "1": { - "index": 17, - "text": "Controller Change" - }, - "2": { - "index": 16, - "text": "Broker Change" - }, - "3": { - "index": 15, - "text": "Topic Change" - }, - "4": { - "index": 14, - "text": "Topic Deletion" - }, - "5": { - "index": 13, - "text": "Alter Partition Reassignment" - }, - "6": { - "index": 12, - "text": "Auto Leader Balance" - }, - "7": { - "index": 11, - "text": "Manual Leader Balance" - }, - "8": { - "index": 10, - "text": "Controlled Shutdown" - }, - "9": { - "index": 9, - "text": "Isr Change" - }, - "10": { - "index": 8, - "text": "Leader And Isr Response Received" - }, - "11": { - "index": 7, - "text": "Log Dir Change" - }, - "12": { - "index": 6, - "text": "Controller Shutdown" - }, - "13": { - "index": 5, - "text": "Unclean Leader Election Enable" - }, - "14": { - "index": 4, - "text": "Topic Unclean Leader Election Enable" - }, - "15": { - "index": 3, - "text": "List Partition Reassignment" - }, - "16": { - "index": 2, - "text": "Update Metadata Response Received" - }, - "101": { - "index": 19, - "text": "KRaft" - }, - "-1": { - "index": 1, - "text": "Cluster Fail" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "index": 0, - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(204, 204, 220)", - "value": null - }, - { - "color": "#1F60C4", - "value": 0 - }, - { - "color": "#FA6400", - "value": 1 - }, - { - "color": "dark-blue", - "value": 101 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 5, - "x": 11, - "y": 1 - }, - "id": 62, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(kafka_controller_KafkaController_Value{name=\"ControllerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kafka_cluster_quorum_mode{namespace=\"$namespace\"} == 101", - "format": "table", - "hide": false, - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "B" - } - ], - "title": "Controller State", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Current controller broker.\nThe first node to boot in a Kafka cluster automatically becomes the controller, and there can be only one. The controller in a Kafka cluster is responsible for maintaining the list of partition leaders, and coordinating leadership transitions (in the event a partition leader becomes unavailable). If it becomes necessary to replace the controller, a new controller is randomly chosen by ZooKeeper from the pool of brokers. In general, it is not possible for this value to be greater than one, but you should definitely alert on a value of zero", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "Active controller is not elected" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 1 - }, - "id": 24, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_controller_KafkaController_Value{name=\"ActiveControllerCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"} > 0", - "instant": true, - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Controller Broker", - "type": "stat" - }, - { - "datasource": { - "default": false, - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Kafka version running on controller broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "text", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 60, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "group by (kafka_version) (kafka_cluster_status{namespace=\"$namespace\",cluster=\"$cluster\"})", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{kafka_version}}", - "refId": "B" - } - ], - "title": "Kafka Version", - "type": "stat" - }, - { - "datasource": { - "default": false, - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Transitions of Kafka cluster statuses", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "custom": { - "fillOpacity": 100, - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineWidth": 0, - "spanNulls": false - }, - "fieldMinMax": false, - "mappings": [ - { - "options": { - "0": { - "color": "semi-dark-green", - "index": 5, - "text": "UP" - }, - "6": { - "color": "semi-dark-orange", - "index": 4, - "text": "DEGRADED" - }, - "10": { - "color": "semi-dark-red", - "index": 3, - "text": "DOWN" - }, - "N/A": { - "color": "#CCC", - "index": 2 - }, - "null": { - "index": 1, - "text": "N/A" - } - }, - "type": "value" - }, - { - "options": { - "from": null, - "result": { - "index": 0, - "text": "N/A" - }, - "to": null - }, - "type": "range" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 12, - "x": 0, - "y": 5 - }, - "id": 106, - "options": { - "alignValue": "center", - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "mergeValues": true, - "rowHeight": 0.9, - "showValue": "never", - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "exemplar": false, - "expr": "max(kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Cluster State", - "queryType": "randomWalk", - "range": true, - "refId": "A" - } - ], - "title": "Cluster Status Transitions", - "type": "state-timeline" - }, - { - "datasource": { - "default": false, - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Transitions of controller broker states. The controller changes state when processing an event. Most of the time, the Idle state is displayed, which means that the controller has processed all the events. If the event processing takes a considerable time (several seconds or more), this state of the controller will be displayed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "custom": { - "fillOpacity": 100, - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineWidth": 0, - "spanNulls": false - }, - "mappings": [ - { - "options": { - "0": { - "color": "dark-blue", - "index": 19, - "text": "Idle" - }, - "1": { - "color": "dark-orange", - "index": 18, - "text": "Controller Change" - }, - "2": { - "color": "dark-orange", - "index": 17, - "text": "Broker Change" - }, - "3": { - "color": "dark-orange", - "index": 16, - "text": "Topic Change" - }, - "4": { - "color": "dark-orange", - "index": 15, - "text": "Topic Deletion" - }, - "5": { - "color": "dark-orange", - "index": 14, - "text": "Alter Partition Reassignment" - }, - "6": { - "color": "dark-orange", - "index": 13, - "text": "Auto Leader Balance" - }, - "7": { - "color": "dark-orange", - "index": 12, - "text": "Manual Leader Balance" - }, - "8": { - "color": "dark-orange", - "index": 11, - "text": "Controlled Shutdown" - }, - "9": { - "color": "dark-orange", - "index": 10, - "text": "Isr Change" - }, - "10": { - "color": "dark-orange", - "index": 9, - "text": "Leader And Isr Response Received" - }, - "11": { - "color": "dark-orange", - "index": 8, - "text": "Log Dir Change" - }, - "12": { - "color": "dark-orange", - "index": 7, - "text": "Controller Shutdown" - }, - "13": { - "color": "dark-orange", - "index": 6, - "text": "Unclean Leader Election Enable" - }, - "14": { - "color": "dark-orange", - "index": 5, - "text": "Topic Unclean Leader Election Enable" - }, - "15": { - "color": "dark-orange", - "index": 4, - "text": "List Partition Reassignment" - }, - "16": { - "color": "dark-orange", - "index": 3, - "text": "Update Metadata Response Received" - }, - "-1": { - "color": "dark-red", - "index": 1, - "text": "Cluster Fail" - }, - "null": { - "color": "text", - "index": 2, - "text": "N/A" - } - }, - "type": "value" - }, - { - "options": { - "from": null, - "result": { - "color": "text", - "index": 0, - "text": "N/A" - }, - "to": null - }, - "type": "range" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 12, - "x": 12, - "y": 5 - }, - "id": 107, - "options": { - "alignValue": "left", - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "mergeValues": true, - "rowHeight": 0.9, - "showValue": "never", - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(kafka_controller_KafkaController_Value{name=\"ControllerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", - "interval": "", - "legendFormat": "Controller state", - "queryType": "randomWalk", - "range": true, - "refId": "A" - } - ], - "title": "Controller State Transitions", - "type": "state-timeline" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Transitions of readiness probes for each Kafka pod. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "custom": { - "fillOpacity": 100, - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineWidth": 0, - "spanNulls": false - }, - "mappings": [ - { - "options": { - "0": { - "color": "semi-dark-red", - "index": 3, - "text": "FAILURE" - }, - "1": { - "color": "semi-dark-green", - "index": 2, - "text": "SUCCESS" - }, - "null": { - "color": "text", - "index": 1, - "text": "N/A" - } - }, - "type": "value" - }, - { - "options": { - "from": null, - "result": { - "color": "text", - "index": 0, - "text": "N/A" - }, - "to": null - }, - "type": "range" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 98, - "interval": "$inter", - "options": { - "alignValue": "left", - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "mergeValues": true, - "rowHeight": 0.9, - "showValue": "never", - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "min(kube_pod_status_ready{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\", condition=\"true\"}) by (exported_pod)", - "interval": "", - "legendFormat": "{{exported_pod}}", - "refId": "A" - } - ], - "title": "Pod Readiness Probe Transitions", - "type": "state-timeline" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Information about brokers configuration consistency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 58, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_cluster_status{namespace=\"$namespace\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{same_configs}}", - "refId": "A" - } - ], - "title": "Similar Configs", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Information about broker states, number of partitions replicas on each broker, number of partitions where current broker is a leader, skews and number of under replicated partitions", - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "decimals": 0, - "displayName": "", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "displayName", - "value": "Time" - }, - { - "id": "custom.hidden", - "value": true - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "broker" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #A" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker State" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #B" - }, - "properties": [ - { - "id": "displayName", - "value": "Partitions" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "custom.align" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2000 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 4000 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #C" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker Skew" - }, - { - "id": "unit", - "value": "percent" - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "custom.align" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "#C4162A", - "value": null - }, - { - "color": "#56A64B", - "value": -50 - }, - { - "color": "#C4162A", - "value": 50 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #D" - }, - "properties": [ - { - "id": "displayName", - "value": "Partition Leadership" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #E" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker Leader Skew" - }, - { - "id": "unit", - "value": "percent" - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "custom.align" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "#C4162A", - "value": null - }, - { - "color": "#56A64B", - "value": -50 - }, - { - "color": "#C4162A", - "value": 50 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #F" - }, - "properties": [ - { - "id": "displayName", - "value": "Under Replicated Partitions" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "custom.align" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "#FA6400", - "value": null - }, - { - "color": "#56A64B", - "value": 0 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #G" - }, - "properties": [ - { - "id": "displayName", - "value": "Under Min ISR Partitions" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "custom.align" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "#FA6400", - "value": null - }, - { - "color": "#56A64B", - "value": 0 - }, - { - "color": "#C4162A", - "value": 1 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 13 - }, - "id": 66, - "interval": "$inter", - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_server_KafkaServer_Value{name=\"BrokerState\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(kafka_server_ReplicaManager_Value{name=\"PartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_broker_skew{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_server_ReplicaManager_Value{name=\"LeaderCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_broker_leader_skew{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_server_ReplicaManager_Value{name=\"UnderReplicatedPartitions\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "F" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_server_ReplicaManager_Value{name=\"UnderMinIsrPartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "G" - } - ], - "timeFrom": "5m", - "title": "Brokers", - "transformations": [ - { - "id": "merge", - "options": { - "reducers": [] - } - } - ], - "type": "table" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 164, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Broker Issues", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of under-replicated partitions. Replicas that are added as part of a reassignment will not count toward this value", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 168, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_ReplicaManager_Value{name=\"UnderReplicatedPartitions\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", - "legendFormat": "{{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Under Replicated Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of partitions whose in-sync replicas (ISR) count is less than `minIsr`", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 169, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_ReplicaManager_Value{name=\"UnderMinIsrPartitionCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", - "legendFormat": "{{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Under Min ISR Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of partitions that don’t have an active leader and are hence not writable or readable", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 170, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "kafka_controller_KafkaController_Value{name=\"OfflinePartitionsCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", - "legendFormat": "{{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Offline Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of partitions which have open transactions with durations exceeding `transaction.max.timeout.ms` (plus 5 minutes)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 172, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "kafka_server_ReplicaManager_Value{name=\"PartitionsWithLateTransactionsCount\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}", - "legendFormat": "{{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Partitions With Late Transactions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The rate of errors in responses counted per error code. If a response contains multiple errors, all are counted", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 171, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true, - "width": 390 - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_RequestMetrics_Count_total{name=\"ErrorsPerSec\",error!=\"NONE\",namespace=\"$namespace\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])) by (broker, error)", - "legendFormat": "{{ broker }}: {{error}}", - "range": true, - "refId": "A" - } - ], - "title": "Broker Errors Rate", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 64, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Total number of topics on Kafka cluster. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "Cluster Fail" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 41 - }, - "id": 68, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(kafka_controller_KafkaController_Value{name=\"GlobalTopicCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Total Topics Count", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Total number of partitions on Kafka cluster. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "Cluster Fail" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(143, 59, 184, 0)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100000 - }, - { - "color": "#d44a3a", - "value": 200000 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 41 - }, - "id": 70, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(kafka_controller_KafkaController_Value{name=\"GlobalPartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Total Partitions Count", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Number of max partitions per broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "Cluster Fail" - } - }, - "type": "value" - }, - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(143, 59, 184, 0)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100000 - }, - { - "color": "#d44a3a", - "value": 200000 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 41 - }, - "id": 173, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max(max(kafka_server_ReplicaManager_Value{name=\"PartitionCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker))", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Max Partitions Per Broker", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "List of topics which have at least one partition without a leader. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 46 - }, - "id": 74, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "count(kafka_cluster_topic_without_leader{namespace=\"$namespace\", cluster=\"$cluster\"}) OR on() vector(0)", - "format": "time_series", - "instant": true, - "legendFormat": "{{topics_without_leader}}", - "refId": "A" - } - ], - "title": "Topics Without Leader", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of partitions without an active leader. Because all read and write operations are only performed on partition leaders, a non-zero value for this metric should be alerted on to prevent service interruptions. Any partition without an active leader will be completely inaccessible, and both consumers and producers of that partition will be blocked until a leader becomes available", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "-1": { - "text": "Kafka controller is not elected" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#FA6400", - "value": null - }, - { - "color": "rgba(86, 166, 75, 0)", - "value": 0 - }, - { - "color": "#E02F44", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 46 - }, - "id": 26, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_controller_KafkaController_Value{name=\"OfflinePartitionsCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Offline Partitions Count", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Count of topics which have at least one under replicated partition. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "orange", - "value": null - }, - { - "color": "transparent", - "value": 0 - }, - { - "color": "orange", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 46 - }, - "id": 76, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "count(group(kafka_cluster_Partition_Value{name=\"UnderReplicated\",namespace=\"$namespace\",cluster=\"$cluster\"} > 0) by (topic)) OR on() vector(0)", - "instant": true, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Under Replicated Topics", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Count of topics with unclean leader election enabled. Unclean leader elections can lead to data loss, so you should check these topics to see if this setting is set reasonably. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "links": [ - { - "targetBlank": true, - "title": "", - "url": "/d/d16460083/kafka-topics?var-namespace=${namespace}&viewPanel=67" - } - ], - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "orange", - "value": null - }, - { - "color": "transparent", - "value": 0 - }, - { - "color": "orange", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 46 - }, - "id": 104, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "auto", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "count(kafka_cluster_unclean_election_topics{namespace=\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Topics With Unclean Leader Election Enabled", - "type": "stat" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Topics", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 41 - }, - "id": 78, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of consumer groups in different states. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "custom": { - "align": "right", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "decimals": 2, - "displayName": "", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "displayName", - "value": "Time" - }, - { - "id": "custom.hidden", - "value": true - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "namespace" - }, - "properties": [ - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.hidden", - "value": true - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #A" - }, - "properties": [ - { - "id": "displayName", - "value": "Total" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #B" - }, - "properties": [ - { - "id": "displayName", - "value": "Stable" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #C" - }, - "properties": [ - { - "id": "displayName", - "value": "PreparingRebalance" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #D" - }, - "properties": [ - { - "id": "displayName", - "value": "CompletingRebalance" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #E" - }, - "properties": [ - { - "id": "displayName", - "value": "Empty" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #F" - }, - "properties": [ - { - "id": "displayName", - "value": "Dead" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 42 - }, - "id": 80, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroups\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsStable\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsPreparingRebalance\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsCompletingRebalance\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsEmpty\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_coordinator_group_GroupMetadataManager_Value{name=\"NumGroupsDead\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (namespace)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "F" - } - ], - "title": "Consumer Groups Number", - "transformations": [ - { - "id": "merge", - "options": { - "reducers": [] - } - } - ], - "type": "table" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Consumer Groups", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 42 - }, - "id": 48, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "JVM heap usage by broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/limit/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C4162A", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineWidth", - "value": 2 - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 43 - }, - "id": 1, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(java_Memory_HeapMemoryUsage_used{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "interval": "", - "legendFormat": "{{broker}}: used", - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(java_Memory_HeapMemoryUsage_max{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "interval": "", - "legendFormat": "{{broker}}: limit", - "refId": "C" - } - ], - "title": "JVM Heap Usage", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "JVM heap usage by broker in percent (%). May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 43 - }, - "id": 21, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(max(java_Memory_HeapMemoryUsage_used{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker) / max(java_Memory_HeapMemoryUsage_max{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)) * 100", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "JVM Heap Usage in Percent", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Garbage collection time rate per second by broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 23, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(java_GarbageCollector_CollectionTime_total{name=\"G1 Young Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}: young", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(java_GarbageCollector_CollectionTime_total{name=\"G1 Old Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}: old", - "refId": "B" - } - ], - "title": "GC Time", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Garbage collections count rate per second by broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 82, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(java_GarbageCollector_CollectionCount_total{name=\"G1 Young Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >=0", - "interval": "", - "legendFormat": "{{broker}}: young", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(java_GarbageCollector_CollectionCount_total{name=\"G1 Old Generation\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >=0", - "interval": "", - "legendFormat": "{{broker}}: old", - "refId": "B" - } - ], - "title": "GC Count", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "JVM Heap and GC", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 43 - }, - "id": 49, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Memory usage by pod. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/limit/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineWidth", - "value": 3 - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 44 - }, - "id": 44, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\"}) by (pod)", - "legendFormat": "{{pod}}: allocated", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\", image!=\"\"}) by (pod)", - "legendFormat": "{{pod}}: usage", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\"}) by (exported_pod)", - "legendFormat": "{{exported_pod}}: limit", - "refId": "C" - } - ], - "title": "Memory Usage", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "RAM", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 44 - }, - "id": 50, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "CPU usage by pod. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Millicores", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/limit/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineWidth", - "value": 4 - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/request/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineWidth", - "value": 1 - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 45 - }, - "id": 46, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", cluster=\"$cluster\", pod=~\"$broker.*\", container!=\"\", image!=\"\"}[$__interval])) by (pod)) * 1000", - "legendFormat": "{{pod}}: usage", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(sum(kube_pod_container_resource_limits_cpu_cores{exported_namespace=\"$namespace\", cluster=\"$cluster\", exported_pod=~\"$broker.*\"}) by (exported_pod)) * 1000", - "legendFormat": "{{exported_pod}}: limit", - "refId": "C" - } - ], - "title": "CPU Usage", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "CPU", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 45 - }, - "id": 51, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "The total size in bytes of read operations on the volume for each broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 46 - }, - "id": 31, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_KafkaServer_Value{name=\"linux-disk-read-bytes\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])", - "legendFormat": "{{ broker }}", - "refId": "A" - } - ], - "title": "Disk Read Bytes", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The total size in bytes of write operations on the volume for each broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 46 - }, - "id": 32, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_KafkaServer_Value{name=\"linux-disk-write-bytes\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}[5m])", - "legendFormat": "{{ broker }}", - "refId": "A" - } - ], - "title": "Disk Write Bytes", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The total size in bytes of volume space occupied by topic partitions logs for each broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 34, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_log_Log_Value{name=\"Size\",namespace=\"${namespace}\",cluster=\"$cluster\",broker=~\"$broker\"}) by (broker)", - "legendFormat": "{{ broker }}", - "refId": "A" - } - ], - "title": "Topic Partition Data Size", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Disk", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 46 - }, - "id": 84, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Total number of controller requests to be sent out to brokers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 47 - }, - "id": 86, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_controller_ControllerChannelManager_Value{name=\"TotalQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) >= 0", - "interval": "", - "legendFormat": "total_queue_size", - "refId": "A" - } - ], - "title": "Total Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Size of ControllerEventManager's queue. Every ControllerEvent has an associated state. When a ControllerEvent is processed, it triggers a state transition to the requested state. ControllerEvent events are managed by ControllerEventManager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 47 - }, - "id": 88, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_controller_ControllerEventManager_Value{name=\"EventQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) >= 0", - "interval": "", - "legendFormat": "event_queue_size", - "refId": "A" - } - ], - "title": "Event Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Time it takes for any event (except the Idle event) to wait in the ControllerEventManager's queue before being processed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 47 - }, - "id": 90, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(idelta(kafka_controller_ControllerEventManager_Count_total{name=\"EventQueueTimeMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval]))", - "interval": "", - "legendFormat": "event_queue_time_rate", - "refId": "A" - } - ], - "title": "Event Queue Time Rate", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Controller Queues", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 52, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Aggregate incoming/outgoing byte rate per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/in/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#3F6833", - "mode": "fixed" - } - }, - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/out/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E5AC0E", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 15, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesInPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}: in", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesOutPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}: out", - "refId": "B" - } - ], - "title": "Bytes In/Out", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Amount of data in bytes per second rejected by broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 17, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesRejectedPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Bytes Rejected", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Network", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 48 - }, - "id": 92, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "ZooKeeper session state for each broker. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "custom": { - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "decimals": 2, - "displayName": "", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "broker" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "value" - }, - "properties": [ - { - "id": "displayName", - "value": "Zookeeper Session State" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - } - ] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 49 - }, - "id": 94, - "interval": "$inter", - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_SessionExpireListener_Value{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": "5m", - "title": "ZooKeeper Session State", - "transformations": [ - { - "id": "merge", - "options": { - "reducers": [] - } - } - ], - "type": "table" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Rate of ZooKeeper requests. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 49 - }, - "id": 96, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "rate(kafka_server_ZooKeeperClientMetrics_Count_total{name=\"ZooKeeperRequestLatencyMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])", - "interval": "", - "legendFormat": "{{broker}}", - "range": true, - "refId": "A" - } - ], - "title": "ZooKeeper Requests Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Latency of ZooKeeper requests. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 49 - }, - "id": 108, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "kafka_server_ZooKeeperClientMetrics_95thPercentile{name=\"ZooKeeperRequestLatencyMs\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}", - "interval": "", - "legendFormat": "{{broker}}", - "range": true, - "refId": "A" - } - ], - "title": "ZooKeeper Requests Latency", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "ZooKeeper Connection", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 49 - }, - "id": 53, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "ISR expansion rate. If a broker goes down, ISR for some of the partitions will shrink. When that broker is up again, ISR will be expanded once the replicas are fully caught up. Other than that, the expected value for both ISR shrink rate and expansion rate is 0", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 12, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(idelta(kafka_server_ReplicaManager_Count_total{name=\"IsrExpandsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Isr Expands Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "ISR shrink rate. If a broker goes down, ISR for some of the partitions will shrink. When that broker is up again, ISR will be expanded once the replicas are fully caught up. Other than that, the expected value for both ISR shrink rate and expansion rate is 0", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 13, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "7.3.6", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(idelta(kafka_server_ReplicaManager_Count_total{name=\"IsrShrinksPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker) >= 0", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Isr Shrinks Rate", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "In Sync Replica (ISR)", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 50 - }, - "id": 54, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Aggregate incoming messages rate per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 51 - }, - "id": 4, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"MessagesInPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Message Rate", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Messages", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 51 - }, - "id": 55, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Total fetch request rate per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 27, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(abs(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"TotalFetchRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m]))) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Total Fetch Requests Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Total produce request rate per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 52 - }, - "id": 18, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(abs(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"TotalProduceRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\",}[5m]))) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Total Produce Requests Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Fetch request rate per second for requests that failed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 59 - }, - "id": 11, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"FailedFetchRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Failed Fetch Requests Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Produce request rate per second for requests that failed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 59 - }, - "id": 28, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"FailedProduceRequestsPerSec\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[5m])) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Failed Produce Requests Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Size of fetch queue. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 66 - }, - "id": 30, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(idelta(kafka_server_Fetch_queue_size{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Fetch Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Size of request queue. A congested request queue will not be able to process incoming or outgoing requests", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 66 - }, - "id": 29, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(idelta(kafka_network_RequestChannel_Value{name=\"RequestQueueSize\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}[$__interval])) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Request Queue Size", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Requests", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 52 - }, - "id": 56, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of live threads, including both daemon and non-daemon threads. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 53 - }, - "id": 3, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(java_Threading_ThreadCount{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Thread Count", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Total number of threads created and also started since the JVM started. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 53 - }, - "id": 14, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(java_Threading_TotalStartedThreadCount{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "interval": "", - "legendFormat": "{{broker}}", - "refId": "A" - } - ], - "title": "Total Started Thread Count", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Information about configured and dead Log Cleaner and Replica Fetcher threads. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "displayName", - "value": "Time" - }, - { - "id": "custom.hidden", - "value": true - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "broker" - }, - "properties": [ - { - "id": "displayName", - "value": "Broker" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #A" - }, - "properties": [ - { - "id": "displayName", - "value": "Log Cleaner Threads Count" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #B" - }, - "properties": [ - { - "id": "displayName", - "value": "Log Cleaner Dead Threads Count" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #C" - }, - "properties": [ - { - "id": "displayName", - "value": "Replica Fetcher Threads Count" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #D" - }, - "properties": [ - { - "id": "displayName", - "value": "Replica Fetcher Dead Threads Count" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align" - } - ] - } - ] - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 59 - }, - "id": 100, - "interval": "$inter", - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "11.2.1", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_log_cleaner_threads_count{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_log_LogCleaner_Value{name=\"DeadThreadCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_replica_fetcher_threads_count{namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(kafka_server_ReplicaFetcherManager_Value{name=\"DeadThreadCount\", namespace=\"$namespace\", cluster=\"$cluster\", broker=~\"$broker\"}) by (broker)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - } - ], - "timeFrom": "5m", - "title": "Log Cleaner And Replica Fetcher Threads", - "transformations": [ - { - "id": "merge", - "options": { - "reducers": [] - } - } - ], - "type": "table" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PC3E95692D54ABCC0" - }, - "refId": "A" - } - ], - "title": "Threads", - "type": "row" - } - ], - "refresh": "10s", - "schemaVersion": 39, - "tags": [ - "kafka", - "prometheus" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "Platform Monitoring Prometheus", - "value": "PC3E95692D54ABCC0" - }, - "hide": 0, - "includeAll": false, - "label": "Cloud", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Cluster", - "multi": false, - "name": "cluster", - "options": [], - "query": { - "query": "label_values(kafka_controller_KafkaController_Value, cluster)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": true, - "text": "kafka-service", - "value": "kafka-service" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(kafka_controller_KafkaController_Value{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Broker", - "multi": true, - "name": "broker", - "options": [], - "query": "label_values(kafka_controller_KafkaController_Value{namespace=\"$namespace\", cluster=\"$cluster\"}, broker)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "auto": true, - "auto_count": 30, - "auto_min": "120s", - "current": { - "selected": false, - "text": "auto", - "value": "$__auto_interval_inter" - }, - "hide": 0, - "label": "Sampling", - "name": "inter", - "options": [ - { - "selected": true, - "text": "auto", - "value": "$__auto_interval_inter" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } - ], - "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Monitoring", - "uid": "e31fe3150b0d4da69b5147cdf7f60ded0a6a0445", - "version": 4, - "weekStart": "" - } ---- -# Source: kafka-service/templates/kafka-topics-dashboard.yaml -apiVersion: integreatly.org/v1alpha1 -kind: GrafanaDashboard -metadata: - name: kafka-topics-grafana-dashboard - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - app: grafana -spec: - name: kafka-topics-dashboard.json - json: > - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "Kafka Topics", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 389, - "links": [], - "panels": [ - { - "collapsed": true, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 3, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "The count of partitions, number of messages and size in bytes for each topic in descending order of size values", - "fieldConfig": { - "defaults": { - "color": { - "mode": "fixed" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Size" - }, - "properties": [ - { - "id": "unit", - "value": "bytes" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 20, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(sum(kafka_log_Log_Value{name=\"LogEndOffset\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker) - sum(kafka_log_Log_Value{name=\"LogStartOffset\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", - "format": "table", - "instant": true, - "range": false, - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", - "format": "table", - "instant": true, - "range": false, - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(count(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)) by (topic)", - "format": "table", - "instant": true, - "range": false, - "refId": "C" - } - ], - "title": "Topics", - "transformations": [ - { - "id": "merge", - "options": { - - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "Value #A": 3, - "Value #B": 4, - "Value #C": 2, - "topic": 1 - }, - "renameByName": { - "Value #A": "Number of Messages", - "Value #B": "Size", - "Value #C": "Partitions Count", - "topic": "Topic Name" - } - } - }, - { - "id": "sortBy", - "options": { - "fields": { - - }, - "sort": [ - { - "desc": true, - "field": "Size" - } - ] - } - } - ], - "type": "table" - } - ], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Overview", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 61, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "The list of topic partitions whose in-sync replicas (ISR) count is less than `minIsr` for each broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 2 - }, - "id": 66, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderMinIsr\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, partition) == 1", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Under Min ISR Partitions Table", - "transformations": [ - { - "id": "merge", - "options": { - - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true - }, - "includeByName": { - - }, - "indexByName": { - "Time": 0, - "Value": 4, - "broker": 2, - "partition": 3, - "topic": 1 - }, - "renameByName": { - "Time": "", - "broker": "Broker", - "partition": "Partition", - "topic": "Topic" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "The number of topic under-replicated partitions for each broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 65, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "frameIndex": 1, - "showHeader": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderReplicated\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, partition) == 1", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Under Replicated Partitions Table", - "transformations": [ - { - "id": "merge", - "options": { - - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true - }, - "includeByName": { - - }, - "indexByName": { - "Time": 0, - "Value": 4, - "broker": 2, - "partition": 3, - "topic": 1 - }, - "renameByName": { - "Time": "", - "broker": "Broker", - "partition": "Partition", - "topic": "Topic" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "List of topics with unclean leader election enabled. Unclean leader elections can lead to data loss, so you should check these topics to see if this setting is set reasonably. May be not applicable for managed external Kafka.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 2 - }, - "id": 67, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "frameIndex": 1, - "showHeader": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kafka_cluster_unclean_election_topics{namespace=\"$namespace\", topic=~\"$topic\"}) by (topic)", - "format": "table", - "instant": true, - "legendFormat": "{{ topic }} - {{ broker }}", - "range": false, - "refId": "A" - } - ], - "title": "Unclean Election Leader Topics Table", - "transformations": [ - { - "id": "merge", - "options": { - - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true - }, - "includeByName": { - - }, - "indexByName": { - - }, - "renameByName": { - "Time": "", - "topic": "Topic" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The total size in bytes of volume space occupied by specific topic for each broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 31, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "refId": "A" - } - ], - "title": "Partition Data Size", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of topic partitions whose in-sync replicas (ISR) count is less than `minIsr` for each broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 63, - "interval": "$inter", - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_cluster_Partition_Value{name=\"UnderMinIsr\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Under Min ISR Partitions", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Topic Issues", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "id": 23, - "panels": [ - { - "datasource": { - "uid": "$datasource" - }, - "description": "The incoming messages rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 3 - }, - "id": 40, - "interval": "$inter", - "options": { - "legend": { - "calcs": [ - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"MessagesInPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "range": true, - "refId": "A" - } - ], - "title": "Incoming Messages Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The incoming bytes rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 50, - "interval": "$inter", - "links": [], - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesInPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "refId": "A" - } - ], - "title": "Incoming Bytes Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The outgoing bytes rate by specific topic for each broker. `No Data` for specific topic means that there are no operations performed on the topic", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 60, - "interval": "$inter", - "links": [], - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "max(rate(kafka_server_BrokerTopicMetrics_Count_total{name=\"BytesOutPerSec\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}[5m])) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "refId": "A" - } - ], - "title": "Outgoing Bytes Rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The total size in bytes of volume space occupied by specific topic for each broker", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 68, - "interval": "$inter", - "links": [], - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_log_Log_Value{name=\"Size\",topic=~\"$topic\",broker=~\"$broker\",namespace=\"$namespace\",cluster=\"$cluster\"}) by (topic, broker)", - "format": "time_series", - "legendFormat": "{{ topic }} - {{ broker }}", - "refId": "A" - } - ], - "title": "Partition Data Size", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Topics Information", - "type": "row" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "kafka", - "prometheus", - "kafka_topics" - ], - "templating": { - "list": [ - { - "hide": 0, - "includeAll": false, - "label": "Cloud", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Cluster", - "multi": false, - "name": "cluster", - "options": [], - "query": "label_values(kafka_controller_KafkaController_Value, cluster)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(kafka_controller_KafkaController_Value{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Broker", - "multi": true, - "name": "broker", - "options": [], - "query": "label_values(kafka_controller_KafkaController_Value{namespace=\"$namespace\", cluster=\"$cluster\"}, broker)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "query_result(count by (topic)(count_over_time(kafka_log_Log_Value{namespace=\"$namespace\", topic=~\"$topics_regex\", cluster=\"$cluster\", name=\"Size\"}[1h])))", - "hide": 0, - "includeAll": true, - "label": "Topic", - "multi": true, - "name": "topic", - "options": [], - "query": { - "qryType": 3, - "query": "query_result(count by (topic)(count_over_time(kafka_log_Log_Value{namespace=\"$namespace\", topic=~\"$topics_regex\", cluster=\"$cluster\", name=\"Size\"}[1h])))", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "/topic=\"(?\u003Ctext\u003E[^\"]+)/g", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": true, - "text": ".*", - "value": ".*" - }, - "datasource": "$datasource", - "hide": 0, - "label": "Topics Regex", - "name": "topics_regex", - "options": [ - { - "selected": true, - "text": ".*", - "value": ".*" - } - ], - "query": ".*", - "skipUrlSync": false, - "type": "textbox" - }, - { - "auto": true, - "auto_count": 30, - "auto_min": "120s", - "current": { - "selected": false, - "text": "auto", - "value": "$__auto_interval_inter" - }, - "datasource": "$datasource", - "hide": 0, - "label": "Sampling", - "name": "inter", - "options": [ - { - "selected": true, - "text": "auto", - "value": "$__auto_interval_inter" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "2m", - "value": "2m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } - ], - "query": "1m,2m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Topics", - "uid": "d16460083", - "version": 4, - "weekStart": "" - } ---- -# Source: kafka-service/templates/cr.yaml -apiVersion: netcracker.com/v7 -kind: KafkaService -metadata: - name: kafka - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - app.kubernetes.io/instance: kafka-montemplate - app.kubernetes.io/processed-by-operator: kafka-service-operator - annotations: - netcracker.com/reconcile-trigger: "D94QmztZ0B" -spec: - global: - waitForPodsReady: true - podReadinessTimeout: 600 - kraft: - enabled: false - defaultLabels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: 'backend' - app.kubernetes.io/managed-by: operator - app.kubernetes.io/managed-by-operator: kafka-service-operator - kafkaSaslMechanism: SCRAM-SHA-512 - kafkaSsl: - enabled: false - secretName: "" - monitoring: - dockerImage: ghcr.io/netcracker/qubership-kafka-monitoring:main - minVersion: 2.0.0 - maxVersion: 3.x.x - dataCollectionInterval: 10s - kafkaExecPluginTimeout: 10s - kafkaTotalBrokerCount: 3 - secretName: kafka-monitoring-secret - securityContext: - runAsNonRoot: true - seccompProfile: - type: "RuntimeDefault" - resources: - requests: - memory: 128Mi - cpu: 50m - limits: - memory: 256Mi - cpu: 200m - monitoringType: prometheus - akhq: - dockerImage: ghcr.io/netcracker/qubership-docker-akhq:main - bootstrapServers: kafka:9092 - kafkaPollTimeout: 10000 - enableAccessLog: false - securityContext: - runAsNonRoot: true - seccompProfile: - type: "RuntimeDefault" - ldap: - enabled: false - heapSize: 300 - resources: - requests: - memory: 600Mi - cpu: 50m - limits: - memory: 1200Mi - cpu: 400m - schemaRegistryType: confluent ---- -# Source: kafka-service/templates/prometheus_rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: monitoring - prometheus: Kafka-rules - role: alert-rules - name: prometheus-kafka-service-rules -spec: - groups: - - name: default-kafka-montemplate - rules: - - alert: KafkaIsDegradedAlert - annotations: - description: 'Kafka is Degraded' - summary: Some of Kafka Service pods are down - expr: kafka_cluster_status{namespace="default",container="kafka-monitoring"} == 6 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaMetricsAreAbsent - annotations: - description: 'Kafka metrics are absent on default.' - summary: Kafka metrics are absent - expr: absent(kafka_cluster_status{namespace="default"}) == 1 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaIsDownAlert - annotations: - description: 'Kafka is Down' - summary: All of Kafka Service pods are down - expr: kafka_cluster_status{namespace="default",container="kafka-monitoring"} == 10 - for: 3m - labels: - severity: critical - namespace: default - service: kafka-montemplate - - alert: KafkaCPUUsageAlert - annotations: - description: 'Kafka CPU usage is higher than 95 percents' - summary: Some of Kafka Service pods load CPU higher then 95 percents - expr: max(rate(container_cpu_usage_seconds_total{namespace="default",pod=~"kafka-[0-9].*",container="kafka"}[5m])) / max(kube_pod_container_resource_limits_cpu_cores{exported_namespace="default",exported_pod=~"kafka-[0-9].*"}) > 0.95 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaMemoryUsageAlert - annotations: - description: 'Kafka memory usage is higher than 95 percents' - summary: Some of Kafka Service pods use memory higher then 95 percents - expr: max(container_memory_working_set_bytes{namespace="default",pod=~"kafka-[0-9].*",container="kafka"}) / max(kube_pod_container_resource_limits_memory_bytes{exported_namespace="default",exported_pod=~"kafka-[0-9].*"}) > 0.95 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaHeapMemoryUsageAlert - annotations: - description: 'Kafka heap memory usage is higher than 95 percents' - summary: Some of Kafka Service pods use heap memory higher then 95 percents - expr: max(java_Memory_HeapMemoryUsage_used{namespace="default",broker=~"kafka-[0-9].*"}) / max(java_Memory_HeapMemoryUsage_max{namespace="default", broker=~"kafka-[0-9].*"}) > 0.95 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaGCCountAlert - annotations: - description: 'Some of Kafka Service pods have Garbage collections count rate higher than 10' - summary: Some of Kafka Service pods have Garbage collections count rate higher than 10 - expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="default", broker=~"kafka-[0-9].*"}[5m])) > 10 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaLagAlert - annotations: - description: 'Some of Kafka Service pods have partition lag higher than 1000' - summary: Some of Kafka Service pods have partition lag higher than 1000 - expr: max(kafka_consumergroup_group_lag{namespace="default"}) > 1000 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaPartitionCountAlert - annotations: - description: 'Kafka Partition count for {{ $labels.broker }} broker is higher than 4000' - summary: Some of Kafka Partition count is higher than 4000 - expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 4000 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaBrokerSkewAlert - annotations: - description: 'Kafka Broker Skew for {{ $labels.broker }} broker is higher than 50%' - summary: Some of Kafka Broker Skew is higher than 50% - expr: (kafka_broker_skew{namespace="default", container="kafka-monitoring", broker=~"kafka-[0-9].*"} > 50) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 3) - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: KafkaBrokerLeaderSkewAlert - annotations: - description: 'Kafka Broker Leader Skew for {{ $labels.broker }} broker is higher than 50%' - summary: Some of Kafka Broker Leader Skew is higher than 50% - expr: (kafka_broker_leader_skew{namespace="default", container="kafka-monitoring", broker=~"kafka-[0-9].*"} > 50) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="default", broker=~"kafka-[0-9].*"} > 3) - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate - - alert: SupplementaryServicesCompatibilityAlert - annotations: - description: 'Kafka supplementary services in namespace {{ $labels.namespace }} is not compatible with Kafka version {{ $labels.application_version }}' - summary: 'Kafka supplementary services in namespace {{ $labels.namespace }} is not compatible with Kafka version {{ $labels.application_version }}, allowed range is {{ $labels.min_version }} - {{ $labels.max_version }}' - expr: supplementary_services_version_compatible{application="kafka", namespace="default"} != 1 - for: 3m - labels: - severity: warning - namespace: default - service: kafka-montemplate ---- -# Source: kafka-service/templates/tls_static_metrics.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: monitoring - prometheus: kafka-tls-static-metrics-rules - role: record-rules - name: kafka-tls-static-metrics-rules -spec: - groups: - - name: default-kafka-montemplate - rules: - - expr: 0 - labels: - namespace: "default" - application: "kafka-service" - service: "kafka" - - record: service:tls_status:info ---- -# Source: kafka-service/templates/service_monitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kafka-service-monitor - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: kafka-service-monitor - app.kubernetes.io/component: monitoring -spec: - endpoints: - - interval: 60s - scrapeTimeout: 10s - port: prometheus-cli - scheme: http - jobLabel: k8s-app - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - component: kafka-monitoring - name: kafka-monitoring ---- -# Source: kafka-service/templates/service_monitor_jmx_exporter.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kafka-service-monitor-jmx-exporter - labels: - app.kubernetes.io/version: '' - app.kubernetes.io/part-of: 'kafka-services' - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: kafka-service-monitor-jmx-exporter - app.kubernetes.io/component: monitoring -spec: - endpoints: - - interval: 60s - scrapeTimeout: 10s - port: prometheus-http - scheme: http - basicAuth: - username: - name: kafka-secret - key: client-username - password: - name: kafka-secret - key: client-password - jobLabel: k8s-app - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - component: kafka - clusterName: kafka ---- -# Source: kafka-service/templates/pre-deploy/ownerref-migrator-sa.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kafka-services-ownerref-migrator - labels: - app.kubernetes.io/instance: kafka-montemplate - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-200" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded ---- -# Source: kafka-service/templates/pre-deploy/ownerref-migrator-rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: kafka-services-ownerref-migrator - labels: - app.kubernetes.io/instance: kafka-montemplate - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-190" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -rules: - - apiGroups: ["apps"] - resources: ["deployments","statefulsets"] - verbs: ["get","list","patch","update"] - - apiGroups: [""] - resources: ["configmaps","secrets","services","persistentvolumeclaims"] - verbs: ["get","list","patch","update"] ---- -# Source: kafka-service/templates/pre-deploy/ownerref-migrator-rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: kafka-services-ownerref-migrator - labels: - app.kubernetes.io/instance: kafka-montemplate - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-180" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded - "argocd.argoproj.io/hook": PreSync - "argocd.argoproj.io/hook-delete-policy": HookSucceeded -subjects: - - kind: ServiceAccount - name: kafka-services-ownerref-migrator - namespace: default -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: kafka-services-ownerref-migrator ---- -# Source: kafka-service/templates/pre-deploy/ownerref-migrator-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: kafka-services-ownerref-migrator - labels: - app.kubernetes.io/instance: kafka-montemplate - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "0" - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -spec: - template: - metadata: - labels: - app.kubernetes.io/instance: kafka-montemplate - spec: - serviceAccountName: kafka-services-ownerref-migrator - restartPolicy: OnFailure - securityContext: - runAsNonRoot: true - seccompProfile: { type: RuntimeDefault } - containers: - - name: migrator - image: ghcr.io/netcracker/qubership-docker-kubectl:main - imagePullPolicy: Always - command: ["/bin/sh","-c"] - args: - - | - set -euo pipefail - - # Ensure jq is present - command -v jq >/dev/null 2>&1 || { echo "[migrator] jq is required"; exit 1; } - - # Config - KUBECTL="kubectl" - NS="default" - RESOURCES="${RESOURCES:-deployments,statefulsets,configmaps,secrets,services,persistentvolumeclaims}" - SELECTOR="" - OLD_GROUP="qubership.org/" - - # Build optional selector flag - if [ -n "${SELECTOR}" ]; then SEL="-l ${SELECTOR}"; else SEL=""; fi - - IFS=','; for r in $RESOURCES; do - r="$(echo "$r" | xargs)"; [ -z "$r" ] && continue - - # Get items JSON; on RBAC/list error warn and continue - ITEMS_JSON="$($KUBECTL -n "$NS" get "$r" $SEL -o json 2>/dev/null || true)" - if [ -z "$ITEMS_JSON" ]; then - echo "[migrator][WARN] cannot list $r (RBAC or not found)"; continue - fi - - # Extract names having ownerRefs with apiVersion starting with OLD_GROUP - NAMES="$(printf "%s" "$ITEMS_JSON" \ - | jq -r --arg grp "$OLD_GROUP" '.items[] - | select([(.metadata.ownerReferences // [])[]? - | .apiVersion|tostring - | startswith($grp)] | any) - | .metadata.name')" - - [ -z "$NAMES" ] && continue - # Iterate names without here-strings (portable for /bin/sh) - printf "%s\n" "$NAMES" | while IFS= read -r name; do - [ -z "$name" ] && continue - - OBJ_JSON="$($KUBECTL -n "$NS" get "$r" "$name" -o json 2>/dev/null || true)" - if [ -z "$OBJ_JSON" ]; then - echo "[migrator][WARN] cannot get $r/$name"; continue - fi - - # Build merge-patch that removes matching ownerRefs - PATCH="$(printf "%s" "$OBJ_JSON" | jq -c --arg grp "$OLD_GROUP" ' - {"metadata":{"ownerReferences":[ - (.metadata.ownerReferences // [])[] - | select((.apiVersion|tostring)|startswith($grp)|not) - ]}}')" - - if $KUBECTL -n "$NS" patch "$r" "$name" --type=merge -p "$PATCH" >/dev/null 2>&1; then - echo "[migrator] $r/$name patched" - else - echo "[migrator][WARN] patch failed for $r/$name" - fi - done - done - - echo "[migrator] done" - resources: - limits: - cpu: 100m - memory: 256Mi - requests: - cpu: 20m - memory: 64Mi - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: { drop: ["ALL"] } From 906e2487f69972fc709bfacb566efe3d3930b01d Mon Sep 17 00:00:00 2001 From: FedorProshin Date: Wed, 19 Nov 2025 22:41:49 +0300 Subject: [PATCH 3/4] feat: added backward compatibility in values yaml for alerts, added alerts test using vmalert tool - added description to docs --- monitoring/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/monitoring/README.md b/monitoring/README.md index 5dd58c4b..cd4618f6 100644 --- a/monitoring/README.md +++ b/monitoring/README.md @@ -88,3 +88,14 @@ Triggers for tracking following problems are included in template: If you have Kafka which is deployed in DR mode you need to create two hosts: for left and for right side and to specify the side as value (`left`, `right`) for the macros `{$DR_SIDE}`. If you have Kafka without DR just leave this macros empty. + +### Deep alerts tuning using subchart + +If you want to make deep customizations on alerts (add new ones, override any alert fields, disable alerts etd) you can use v2 alerts functionality. +To use it you need: + +1) Set alertsPackVersion: v2 value in monitoring section in values yaml for kafka-services. +2) Use subchart`s values yaml (/operator/charts/helm/kafka-service/charts/prometheusrules) to set overrides for alerts. Overrides will be merged with default alerts, described in subchart helpers.tpl with higher priority. + +If you will set any other value for alertsPackVersion except "v2" or wont set this value at all - installation will happen on old flavour. +Alert groups in subchart are supported in same manner as described above. \ No newline at end of file From 452431bd57dce496fec205527ca504906f46006a Mon Sep 17 00:00:00 2001 From: FedorProshin Date: Tue, 25 Nov 2025 17:49:05 +0500 Subject: [PATCH 4/4] feat: added alert customizations in subchart, backward compatibility in values yaml for alerts, fixed integration unit test for alerts --- .github/workflows/alerts-test.yml | 2 +- operator/charts/helm/kafka-service/Chart.yaml | 2 +- .../charts/prometheusrules/Chart.yaml | 2 +- .../prometheusrules/templates/_helpers.tpl | 65 +++++++++---------- ...etheusrules.yaml => prometheus_rules.yaml} | 31 +++++---- .../templates/prometheus_rules.yaml | 10 +-- .../charts/helm/kafka-service/values.yaml | 2 +- operator/tests/alerts-tests/test.yaml | 8 +-- 8 files changed, 64 insertions(+), 58 deletions(-) rename operator/charts/helm/kafka-service/charts/prometheusrules/templates/{prometheusrules.yaml => prometheus_rules.yaml} (66%) diff --git a/.github/workflows/alerts-test.yml b/.github/workflows/alerts-test.yml index 7b0227ef..0ee5652a 100644 --- a/.github/workflows/alerts-test.yml +++ b/.github/workflows/alerts-test.yml @@ -34,7 +34,7 @@ jobs: run: | helm template kafka-montemplate ./operator/charts/helm/kafka-service/ > ./operator/tests/alerts-tests/rules.yaml sed -n '/prometheus_rules.yaml/,/---/p' -i ./operator/tests/alerts-tests/rules.yaml - sed '1,13d' -i ./operator/tests/alerts-tests/rules.yaml + sed '0,/spec:/d' -i ./operator/tests/alerts-tests/rules.yaml - name: Check that all necessary tests exists diff --git a/operator/charts/helm/kafka-service/Chart.yaml b/operator/charts/helm/kafka-service/Chart.yaml index d4e24990..f474a4ab 100644 --- a/operator/charts/helm/kafka-service/Chart.yaml +++ b/operator/charts/helm/kafka-service/Chart.yaml @@ -22,7 +22,7 @@ appVersion: 1.0.0 dependencies: # Prometheus alert rules -- name: prometheusrules +- name: monitoring condition: monitoring.install version: ~0 repository: "file://charts/prometheusrules" diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml b/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml index 004fc6f2..92ce7a9f 100644 --- a/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: prometheusrules +name: monitoring description: A Helm chart for Kubernetes # A chart can be either an 'application' or a 'library' chart. diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl index 89a8d83b..d20e878e 100644 --- a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/_helpers.tpl @@ -1,8 +1,7 @@ {{- define "defaultAlerts" -}} -{{- if and (eq .Values.alertsPackVersion "v2") (.Values.install) }} - - name: {{ .Release.Namespace }}-{{ .Release.Name }} + {{ .Release.Namespace }}-{{ .Release.Name }}: rules: - - alert: KafkaIsDegradedAlert + KafkaIsDegradedAlert: annotations: description: 'Kafka is Degraded' summary: Some of Kafka Service pods are down @@ -12,7 +11,7 @@ severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaMetricsAreAbsent + KafkaMetricsAreAbsent: annotations: description: 'Kafka metrics are absent on {{ .Release.Namespace }}.' summary: Kafka metrics are absent @@ -22,7 +21,7 @@ severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaIsDownAlert + KafkaIsDownAlert: annotations: description: 'Kafka is Down' summary: All of Kafka Service pods are down @@ -32,7 +31,7 @@ severity: critical namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaCPUUsageAlert + KafkaCPUUsageAlert: annotations: description: 'Kafka CPU usage is higher than 95 percents' summary: Some of Kafka Service pods load CPU higher then 95 percents @@ -42,7 +41,7 @@ severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaMemoryUsageAlert + KafkaMemoryUsageAlert: annotations: description: 'Kafka memory usage is higher than 95 percents' summary: Some of Kafka Service pods use memory higher then 95 percents @@ -52,7 +51,7 @@ severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaHeapMemoryUsageAlert + KafkaHeapMemoryUsageAlert: annotations: description: 'Kafka heap memory usage is higher than 95 percents' summary: Some of Kafka Service pods use heap memory higher then 95 percents @@ -62,63 +61,63 @@ severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaGCCountAlert + KafkaGCCountAlert: annotations: - description: 'Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.monitoring.thresholds.gcCountAlert }}' - summary: Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.monitoring.thresholds.gcCountAlert }} - expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}[5m])) > {{ .Values.monitoring.thresholds.gcCountAlert }} + description: 'Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.thresholds.gcCountAlert }}' + summary: Some of Kafka Service pods have Garbage collections count rate higher than {{ .Values.thresholds.gcCountAlert }} + expr: max(rate(java_GarbageCollector_CollectionCount_total{namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"}[5m])) > {{ .Values.thresholds.gcCountAlert }} for: 3m labels: severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - - alert: KafkaLagAlert + KafkaLagAlert: annotations: - description: 'Some of Kafka Service pods have partition lag higher than {{ .Values.monitoring.thresholds.lagAlert }}' - summary: Some of Kafka Service pods have partition lag higher than {{ .Values.monitoring.thresholds.lagAlert }} - expr: max(kafka_consumergroup_group_lag{namespace="{{ .Release.Namespace }}"}) > {{ .Values.monitoring.thresholds.lagAlert }} + description: 'Some of Kafka Service pods have partition lag higher than {{ .Values.thresholds.lagAlert }}' + summary: Some of Kafka Service pods have partition lag higher than {{ .Values.thresholds.lagAlert }} + expr: max(kafka_consumergroup_group_lag{namespace="{{ .Release.Namespace }}"}) > {{ .Values.thresholds.lagAlert }} for: 3m labels: severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} - {{- if .Values.monitoring.thresholds.partitionCountAlert }} - - alert: KafkaPartitionCountAlert + {{- if .Values.thresholds.partitionCountAlert }} + KafkaPartitionCountAlert: annotations: - description: 'Kafka Partition count for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.partitionCountAlert }}' - summary: Some of Kafka Partition count is higher than {{ .Values.monitoring.thresholds.partitionCountAlert }} - expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.partitionCountAlert }} + description: 'Kafka Partition count for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.thresholds.partitionCountAlert }}' + summary: Some of Kafka Partition count is higher than {{ .Values.thresholds.partitionCountAlert }} + expr: kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.thresholds.partitionCountAlert }} for: 3m labels: severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} - {{- if .Values.monitoring.thresholds.brokerSkewAlert }} - - alert: KafkaBrokerSkewAlert + {{- if .Values.thresholds.brokerSkewAlert }} + KafkaBrokerSkewAlert: annotations: - description: 'Kafka Broker Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}%' - summary: Some of Kafka Broker Skew is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}% - expr: (kafka_broker_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerSkewAlertPartitionCount (include "kafka.replicas" . ) }}) + description: 'Kafka Broker Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.thresholds.brokerSkewAlert }} percent' + summary: Some of Kafka Broker Skew is higher than {{ .Values.thresholds.brokerSkewAlert }} percent + expr: (kafka_broker_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.thresholds.brokerSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > 3 ) for: 3m labels: severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} - {{- if .Values.monitoring.thresholds.brokerLeaderSkewAlert }} - - alert: KafkaBrokerLeaderSkewAlert + {{- if .Values.thresholds.brokerLeaderSkewAlert }} + KafkaBrokerLeaderSkewAlert: annotations: - description: 'Kafka Broker Leader Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}%' - summary: Some of Kafka Broker Leader Skew is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}% - expr: (kafka_broker_leader_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerLeaderSkewAlertPartitionCount (include "kafka.replicas" . ) }}) + description: 'Kafka Broker Leader Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.thresholds.brokerLeaderSkewAlert }} percent' + summary: Some of Kafka Broker Leader Skew is higher than {{ .Values.thresholds.brokerLeaderSkewAlert }} percent + expr: (kafka_broker_leader_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.thresholds.brokerLeaderSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > 3 ) for: 3m labels: severity: warning namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} - - alert: SupplementaryServicesCompatibilityAlert + SupplementaryServicesCompatibilityAlert: annotations: description: 'Kafka supplementary services in namespace {{`{{ $labels.namespace }}`}} is not compatible with Kafka version {{`{{ $labels.application_version }}`}}' summary: 'Kafka supplementary services in namespace {{`{{ $labels.namespace }}`}} is not compatible with Kafka version {{`{{ $labels.application_version }}`}}, allowed range is {{`{{ $labels.min_version }}`}} - {{`{{ $labels.max_version }}`}}' @@ -129,5 +128,5 @@ namespace: {{ .Release.Namespace }} service: {{ .Release.Name }} {{- end }} -{{- end }} + diff --git a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheus_rules.yaml similarity index 66% rename from operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml rename to operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheus_rules.yaml index b04920bf..ff61cee4 100644 --- a/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheusrules.yaml +++ b/operator/charts/helm/kafka-service/charts/prometheusrules/templates/prometheus_rules.yaml @@ -1,4 +1,4 @@ -{{- if and (eq .Values.alertsPackVersion "v2") (.Values.prometheusMonitoring) }} +{{- if and ( .Values.install) (eq .Values.alertsPackVersion "v2") }} apiVersion: operator.victoriametrics.com/v1beta1 kind: VMRule metadata: @@ -11,31 +11,38 @@ spec: {{- $finalConfig := merge $overrideConfig $defaultConfig -}} {{- $alertGroups := .Values.ruleGroups -}} - {{- range $defaultGroupName, $defaultGroup := $finalConfig }} +{{- $found := true }} +{{- if $alertGroups }} {{- $found := false }} {{- range $alertGroups }} {{- if eq $defaultGroupName . }} - {{- $found = true }} + {{- $found := true }} {{- end }} {{- end }} +{{- else }} + {{- $found := true }} +{{- end }} + {{- if $found }} - name: {{ $defaultGroupName }} + {{- if $defaultGroup.labels }} labels: -{{- range $defaultLabelName, $defaultLabelValue := $defaultGroup.labels }} - {{ $defaultLabelName }}: {{ $defaultLabelValue }} -{{- end }} - {{- if $defaultGroup.interval }} - interval: {{ $defaultGroup.interval }} + {{- range $defaultLabelName, $defaultLabelValue := $defaultGroup.labels }} + {{ $defaultLabelName }}: {{ $defaultLabelValue }} + {{- end }} + {{- end }} + {{- if $defaultGroup.interval }} + interval: {{ $defaultGroup.interval }} {{- end }} - {{- if $defaultGroup.concurrency }} - concurrency: {{ $defaultGroup.concurrency }} + {{- if $defaultGroup.concurrency }} + concurrency: {{ $defaultGroup.concurrency }} {{- end }} rules: {{- range $defaultRuleName, $defaultRule := $defaultGroup.rules }} - alert: {{ $defaultRuleName }} expr: {{ $defaultRule.expr }} - {{- if $defaultRule.for }} + {{- if $defaultRule.for }} for: {{ $defaultRule.for }} {{- end }} labels: @@ -49,4 +56,4 @@ spec: {{- end }} {{- end }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml b/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml index e42c434c..2e49677f 100644 --- a/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml +++ b/operator/charts/helm/kafka-service/templates/prometheus_rules.yaml @@ -1,4 +1,4 @@ -{{- if (and (eq (include "monitoring.install" .) "true") (ne .Values.alertsPackVersion "v2") (ne (include "monitoring.type" .) "influxdb") .Values.global.installDashboard (ne (.Values.monitoring.installGrafanaDashboard | toString) "false")) }} +{{- if and (eq (include "monitoring.install" .) "true") (ne .Values.monitoring.alertsPackVersion "v2") (ne (include "monitoring.type" .) "influxdb") }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -107,8 +107,8 @@ spec: {{- if .Values.monitoring.thresholds.brokerSkewAlert }} - alert: KafkaBrokerSkewAlert annotations: - description: 'Kafka Broker Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}%' - summary: Some of Kafka Broker Skew is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }}% + description: 'Kafka Broker Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }} percent' + summary: Some of Kafka Broker Skew is higher than {{ .Values.monitoring.thresholds.brokerSkewAlert }} percent expr: (kafka_broker_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerSkewAlertPartitionCount (include "kafka.replicas" . ) }}) for: 3m labels: @@ -119,8 +119,8 @@ spec: {{- if .Values.monitoring.thresholds.brokerLeaderSkewAlert }} - alert: KafkaBrokerLeaderSkewAlert annotations: - description: 'Kafka Broker Leader Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}%' - summary: Some of Kafka Broker Leader Skew is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}% + description: 'Kafka Broker Leader Skew for {{`{{ $labels.broker }}`}} broker is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }} percent' + summary: Some of Kafka Broker Leader Skew is higher than {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }} percent expr: (kafka_broker_leader_skew{namespace="{{ .Release.Namespace }}", container="{{ template "kafka.name" . }}-monitoring", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ .Values.monitoring.thresholds.brokerLeaderSkewAlert }}) and on(broker, namespace) (kafka_server_ReplicaManager_Value{name="PartitionCount", namespace="{{ .Release.Namespace }}", broker=~"{{ template "kafka.name" . }}-[0-9].*"} > {{ coalesce .Values.monitoring.thresholds.brokerLeaderSkewAlertPartitionCount (include "kafka.replicas" . ) }}) for: 3m labels: diff --git a/operator/charts/helm/kafka-service/values.yaml b/operator/charts/helm/kafka-service/values.yaml index 4ca740d9..a1074037 100644 --- a/operator/charts/helm/kafka-service/values.yaml +++ b/operator/charts/helm/kafka-service/values.yaml @@ -196,7 +196,7 @@ kafka: monitoring: install: true - alertsPackVersion: "v1" + alertsPackVersion: v1 dockerImage: ghcr.io/netcracker/qubership-kafka-monitoring:main serviceMonitorEnabled: true # affinity: { diff --git a/operator/tests/alerts-tests/test.yaml b/operator/tests/alerts-tests/test.yaml index 8d697449..c23b42ce 100644 --- a/operator/tests/alerts-tests/test.yaml +++ b/operator/tests/alerts-tests/test.yaml @@ -279,8 +279,8 @@ tests: broker: kafka-0 container: kafka-monitoring exp_annotations: - description: Kafka Broker Skew for kafka-0 broker is higher than 50% - summary: Some of Kafka Broker Skew is higher than 50% + description: Kafka Broker Skew for kafka-0 broker is higher than 50 percent + summary: Some of Kafka Broker Skew is higher than 50 percent - interval: 1m input_series: @@ -312,8 +312,8 @@ tests: broker: kafka-0 container: kafka-monitoring exp_annotations: - description: Kafka Broker Leader Skew for kafka-0 broker is higher than 50% - summary: Some of Kafka Broker Leader Skew is higher than 50% + description: Kafka Broker Leader Skew for kafka-0 broker is higher than 50 percent + summary: Some of Kafka Broker Leader Skew is higher than 50 percent - interval: 1m input_series: