From 53ee50c9bdae2c9d730ee0c385a628fa97363996 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Fri, 16 May 2025 15:45:40 -0500 Subject: [PATCH 1/7] update Temporal Cloud dashboard - Add Replication Lag - Add Frontend Resource Exhausted Errors - Rearrange a couple rows - Update various version numbers and variables as result of newer Grafana --- cloud/temporal_cloud.json | 1229 +++++++++++++++++++++++-------------- 1 file changed, 775 insertions(+), 454 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index 238265d..48a3ed6 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -1,4 +1,41 @@ { + "__inputs": [ + { + "name": "DS_EXTERNAL_METRICS", + "label": "external metrics", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.4.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], "annotations": { "list": [ { @@ -26,32 +63,18 @@ "graphTooltip": 0, "id": null, "links": [], - "liveNow": false, "panels": [ { "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 17, + "id": 29, "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "State Transitions", + "title": "Actions", "type": "row" }, { @@ -59,18 +82,19 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -79,6 +103,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -118,7 +143,7 @@ "x": 0, "y": 1 }, - "id": 2, + "id": 31, "options": { "legend": { "calcs": [ @@ -137,20 +162,24 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "instant": false, "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "State Transitions", + "title": "Actions", "type": "timeseries" }, { @@ -188,12 +217,13 @@ "x": 12, "y": 1 }, - "id": 10, + "id": 32, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -201,23 +231,25 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[30d])) by (temporal_namespace)", + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[30d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "30 Day State Transitions", + "title": "30 Days Actions", "type": "stat" }, { @@ -255,12 +287,13 @@ "x": 16, "y": 1 }, - "id": 14, + "id": 33, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -268,23 +301,25 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[7d])) by (temporal_namespace)", + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[7d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "7 Day State Transitions", + "title": "7 Days Actions", "type": "stat" }, { @@ -322,12 +357,13 @@ "x": 20, "y": 1 }, - "id": 15, + "id": 34, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" @@ -335,23 +371,25 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[1d])) by (temporal_namespace)", + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[1d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "1 Day State Transitions", + "title": "1 Day Actions", "type": "stat" }, { @@ -362,9 +400,9 @@ "x": 0, "y": 10 }, - "id": 29, + "id": 21, "panels": [], - "title": "Actions", + "title": "Workflows Overview", "type": "row" }, { @@ -372,17 +410,20 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -391,6 +432,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -430,7 +472,7 @@ "x": 0, "y": 11 }, - "id": 31, + "id": 4, "options": { "legend": { "calcs": [ @@ -449,23 +491,21 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "instant": false, + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_workflow_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Actions", + "title": "Workflow Success", "type": "timeseries" }, { @@ -477,7 +517,40 @@ "fieldConfig": { "defaults": { "color": { - "mode": "continuous-GrYlRd" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -492,48 +565,53 @@ "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, "gridPos": { "h": 9, - "w": 4, + "w": 12, "x": 12, "y": 11 }, - "id": 32, + "id": 41, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "lastNotNull", + "min", + "max" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[30d])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_workflow_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "30 Days Actions", - "type": "stat" + "title": "Workflow Timeouts", + "type": "timeseries" }, { "datasource": { @@ -544,7 +622,40 @@ "fieldConfig": { "defaults": { "color": { - "mode": "continuous-GrYlRd" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -559,48 +670,51 @@ "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, "gridPos": { "h": 9, - "w": 4, - "x": 16, - "y": 11 + "w": 12, + "x": 0, + "y": 20 }, - "id": 33, + "id": 12, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "lastNotNull", + "min", + "max" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[7d])) by (temporal_namespace)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_workflow_failed_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "7 Days Actions", - "type": "stat" + "title": "Workflow Failed", + "type": "timeseries" }, { "datasource": { @@ -611,7 +725,40 @@ "fieldConfig": { "defaults": { "color": { - "mode": "continuous-GrYlRd" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -626,73 +773,63 @@ "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, "gridPos": { "h": 9, - "w": 4, - "x": 20, - "y": 11 + "w": 12, + "x": 12, + "y": 20 }, - "id": 34, + "id": 13, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "lastNotNull", + "min", + "max" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "9.1.4", + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[1d])) by (temporal_namespace)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_workflow_continued_as_new_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "1 Day Actions", - "type": "stat" + "title": "Workflow Continued as New", + "type": "timeseries" }, { "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 20 + "y": 29 }, - "id": 21, + "id": 45, "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Workflows Overview", + "title": "Multi-region Namespaces", "type": "row" }, { @@ -700,18 +837,19 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -720,6 +858,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -749,7 +888,8 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, @@ -757,60 +897,72 @@ "h": 9, "w": 12, "x": 0, - "y": 21 + "y": 30 }, - "id": 4, + "id": 47, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_workflow_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", + "editorMode": "code", + "expr": "sum(rate(temporal_cloud_v0_replication_lag_sum{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)\n/\nsum(rate(temporal_cloud_v0_replication_lag_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "legendFormat": "{{ temporal_namespace }} - p50", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_replication_lag_bucket{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - p99", + "range": true, + "refId": "B" } ], - "title": "Workflow Success", + "title": "Replication Lag", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -819,6 +971,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -856,9 +1009,9 @@ "h": 9, "w": 12, "x": 12, - "y": 21 + "y": 30 }, - "id": 41, + "id": 46, "options": { "legend": { "calcs": [ @@ -877,6 +1030,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -884,34 +1038,49 @@ "uid": "${datasource}" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_workflow_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, namespace_mode)", + "instant": false, "interval": "", - "legendFormat": "{{temporal_namespace}}", + "legendFormat": "{{temporal_namespace}} - {{ namespace_mode }}", "range": true, "refId": "A" } ], - "title": "Workflow Timeouts", + "title": "Actions", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 40, + "panels": [], + "title": "Schedules", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -920,6 +1089,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -957,9 +1127,9 @@ "h": 9, "w": 12, "x": 0, - "y": 30 + "y": 40 }, - "id": 12, + "id": 11, "options": { "legend": { "calcs": [ @@ -978,39 +1148,43 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_workflow_failed_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_schedule_action_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Workflow Failed", + "title": "Successful Schedule Actions", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1019,6 +1193,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1054,11 +1229,11 @@ }, "gridPos": { "h": 9, - "w": 12, + "w": 4, "x": 12, - "y": 30 + "y": 40 }, - "id": 13, + "id": 42, "options": { "legend": { "calcs": [ @@ -1077,39 +1252,29 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_workflow_continued_as_new_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_schedule_buffer_overruns_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Workflow Continued as New", + "title": "Schedule Buffer Overruns", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 39 - }, - "id": 40, - "panels": [], - "title": "Schedules", - "type": "row" - }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "fieldConfig": { "defaults": { @@ -1117,11 +1282,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1130,6 +1297,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1165,11 +1333,11 @@ }, "gridPos": { "h": 9, - "w": 12, - "x": 0, + "w": 4, + "x": 16, "y": 40 }, - "id": 11, + "id": 43, "options": { "legend": { "calcs": [ @@ -1188,6 +1356,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -1196,20 +1365,20 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_schedule_action_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_schedule_missed_catchup_window_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" } ], - "title": "Successful Schedule Actions", + "title": "Schedule Missed Catchup Windows", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "fieldConfig": { "defaults": { @@ -1217,11 +1386,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1230,6 +1401,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1266,10 +1438,10 @@ "gridPos": { "h": 9, "w": 4, - "x": 12, + "x": 20, "y": 40 }, - "id": 42, + "id": 44, "options": { "legend": { "calcs": [ @@ -1288,6 +1460,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -1296,20 +1469,33 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_schedule_buffer_overruns_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_schedule_rate_limited_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" } ], - "title": "Schedule Buffer Overruns", + "title": "Schedules Rate Limited", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 49 + }, + "id": 19, + "panels": [], + "title": "Temporal Cloud gRPC Requests", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "fieldConfig": { "defaults": { @@ -1317,11 +1503,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1330,6 +1518,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1365,11 +1554,11 @@ }, "gridPos": { "h": 9, - "w": 4, - "x": 16, - "y": 40 + "w": 12, + "x": 0, + "y": 50 }, - "id": 43, + "id": 6, "options": { "legend": { "calcs": [ @@ -1388,6 +1577,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -1395,21 +1585,21 @@ "uid": "${datasource}" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_schedule_missed_catchup_window_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", "interval": "", - "legendFormat": "{{temporal_namespace}}", + "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, "refId": "A" } ], - "title": "Schedule Missed Catchup Windows", + "title": "Frontend Service Requests", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "fieldConfig": { "defaults": { @@ -1417,11 +1607,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1430,6 +1622,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1465,11 +1658,11 @@ }, "gridPos": { "h": 9, - "w": 4, - "x": 20, - "y": 40 + "w": 12, + "x": 12, + "y": 50 }, - "id": 44, + "id": 8, "options": { "legend": { "calcs": [ @@ -1488,6 +1681,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -1495,41 +1689,42 @@ "uid": "${datasource}" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_schedule_rate_limited_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_frontend_service_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "hide": false, "interval": "", - "legendFormat": "{{temporal_namespace}}", + "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "editorMode": "code", + "expr": "sum(rate(temporal_cloud_v0_resource_exhausted_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation,resource_exhausted_cause)", + "hide": false, + "instant": false, + "legendFormat": "Rate Limited - {{ resource_exhausted_cause }} / {{temporal_namespace}}", + "range": true, + "refId": "B" } ], - "title": "Schedules Rate Limited", + "title": "Frontend Service Errors", "type": "timeseries" }, { "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 49 + "y": 59 }, - "id": 19, + "id": 36, "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Temporal Service Requests", + "title": "Temporal Service Latency", "type": "row" }, { @@ -1543,11 +1738,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1556,6 +1753,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1585,57 +1783,67 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 50 + "y": 60 }, - "id": 6, + "id": 30, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", - "interval": "", - "legendFormat": "{{temporal_namespace}} - {{operation}}", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "legendFormat": "{{ temporal_namespace }} - 50th Percentile", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "range": true, + "refId": "B" } ], - "title": "Frontend Service Requests", + "title": "StartWorkflowExecution Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "fieldConfig": { "defaults": { @@ -1643,11 +1851,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1656,6 +1866,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1685,64 +1896,63 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 50 + "h": 10, + "w": 8, + "x": 8, + "y": 60 }, - "id": 8, + "id": 1, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_frontend_service_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", - "interval": "", - "legendFormat": "{{temporal_namespace}} - {{operation}}", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "legendFormat": "{{ temporal_namespace }} - 50th Percentile", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "range": true, + "refId": "B" } ], - "title": "Frontend Service Errors", + "title": "SignalWorkflowExecution Latency", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 59 - }, - "id": 36, - "panels": [], - "title": "Temporal Service Latency", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -1754,11 +1964,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1767,6 +1979,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1804,10 +2017,10 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, + "x": 16, "y": 60 }, - "id": 30, + "id": 35, "options": { "legend": { "calcs": [ @@ -1824,14 +2037,15 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", "legendFormat": "{{ temporal_namespace }} - 50th Percentile", "range": true, "refId": "A" @@ -1842,32 +2056,48 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", "hide": false, "legendFormat": "{{ temporal_namespace }} - 99th Percentile", "range": true, "refId": "B" } ], - "title": "StartWorkflowExecution Latency", + "title": "SignalWithStartWorkflowExecution Latency", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 23, + "panels": [], + "title": "Workers and Tasks", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1876,6 +2106,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1905,78 +2136,71 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 60 + "h": 9, + "w": 12, + "x": 0, + "y": 71 }, - "id": 1, + "id": 24, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "legendFormat": "{{ temporal_namespace }} - 50th Percentile", - "range": true, + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - 99th Percentile", - "range": true, - "refId": "B" } ], - "title": "SignalWorkflowExecution Latency", + "title": "Worker Poll Success", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -1985,6 +2209,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2014,92 +2239,56 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 60 + "h": 9, + "w": 12, + "x": 12, + "y": 71 }, - "id": 35, + "id": 26, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "legendFormat": "{{ temporal_namespace }} - 50th Percentile", - "range": true, + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - 99th Percentile", - "range": true, - "refId": "B" } ], - "title": "SignalWithStartWorkflowExecution Latency", + "title": "Worker Poll Timeout", "type": "timeseries" }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 23, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Workers and Tasks", - "type": "row" - }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { @@ -2108,11 +2297,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2121,6 +2312,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2158,9 +2350,9 @@ "h": 9, "w": 12, "x": 0, - "y": 71 + "y": 80 }, - "id": 24, + "id": 25, "options": { "legend": { "calcs": [ @@ -2179,26 +2371,27 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Success", + "title": "Worker Poll Success Sync", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { @@ -2207,11 +2400,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2220,6 +2415,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2257,9 +2453,9 @@ "h": 9, "w": 12, "x": 12, - "y": 71 + "y": 80 }, - "id": 26, + "id": 27, "options": { "legend": { "calcs": [ @@ -2278,6 +2474,7 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { @@ -2285,19 +2482,19 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Timeout", + "title": "Worker Poll Success Async", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { @@ -2306,11 +2503,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2319,6 +2518,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2348,7 +2548,8 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, @@ -2356,9 +2557,9 @@ "h": 9, "w": 12, "x": 0, - "y": 80 + "y": 89 }, - "id": 25, + "id": 37, "options": { "legend": { "calcs": [ @@ -2377,26 +2578,29 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Worker Poll Success Sync", + "title": "Sync Match Rate", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { @@ -2405,11 +2609,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2418,6 +2624,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2455,9 +2662,9 @@ "h": 9, "w": 12, "x": 12, - "y": 80 + "y": 89 }, - "id": 27, + "id": 38, "options": { "legend": { "calcs": [ @@ -2476,26 +2683,42 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": true, + "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Worker Poll Success Async", + "title": "Poll Success Rate", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 98 + }, + "id": 17, + "panels": [], + "title": "State Transitions", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { @@ -2504,11 +2727,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", @@ -2517,6 +2742,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2546,8 +2772,7 @@ "value": 80 } ] - }, - "unit": "percentunit" + } }, "overrides": [] }, @@ -2555,9 +2780,9 @@ "h": 9, "w": 12, "x": 0, - "y": 89 + "y": 99 }, - "id": 37, + "id": 2, "options": { "legend": { "calcs": [ @@ -2576,64 +2801,33 @@ "sort": "none" } }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", "exemplar": true, - "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", + "expr": "sum(rate(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Sync Match Rate", + "title": "State Transitions", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "continuous-GrYlRd" }, "mappings": [], "thresholds": { @@ -2648,87 +2842,216 @@ "value": 80 } ] - } + }, + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 9, - "w": 12, + "w": 4, "x": 12, - "y": 89 + "y": 99 }, - "id": 38, + "id": 10, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { "calcs": [ - "lastNotNull", - "min", - "max" + "lastNotNull" ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[30d])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", + "refId": "A" + } + ], + "title": "30 Day State Transitions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 16, + "y": 99 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[7d])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", + "refId": "A" } + ], + "title": "7 Day State Transitions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] }, + "gridPos": { + "h": 9, + "w": 4, + "x": 20, + "y": 99 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", "exemplar": true, - "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", + "expr": "sum(increase(temporal_cloud_v0_state_transition_count{temporal_namespace=~\"$temporal_namespace\"}[1d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Poll Success Rate", - "type": "timeseries" + "title": "1 Day State Transitions", + "type": "stat" } ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", + "refresh": "", + "schemaVersion": 40, "tags": [], "templating": { "list": [ { - "current": { - "selected": true, - "text": "default", - "value": "default" - }, - "hide": 0, + "current": {}, "includeAll": false, "label": "datasource", - "multi": false, "name": "datasource", "options": [], "query": "prometheus", - "queryValue": "", "refresh": 1, "regex": "", - "skipUrlSync": false, "type": "datasource" }, { "current": {}, "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "definition": "label_values(temporal_namespace)", - "hide": 0, "includeAll": false, "multi": true, "name": "temporal_namespace", @@ -2739,14 +3062,12 @@ }, "refresh": 1, "regex": "", - "skipUrlSync": false, - "sort": 0, "type": "query" } ] }, "time": { - "from": "now-1h", + "from": "now-24h", "to": "now" }, "timepicker": {}, From 195fc03dd3a23720b6318213d4df326d15732d97 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Wed, 28 May 2025 10:07:17 -0500 Subject: [PATCH 2/7] move sync match rate up a bit, add descriptions adds some tooltip descriptions for key metrics with links to documentation --- cloud/temporal_cloud.json | 117 ++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index 48a3ed6..c32944c 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -82,6 +82,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Approximate count of Temporal Cloud Actions per-second. Use `namespace_mode` to filter for active/standby actions for multi-region namespaces. ", "fieldConfig": { "defaults": { "color": { @@ -837,6 +838,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "The transmission delay of Workflow updates and history events from the primary to the replica. [more info](https://docs.temporal.io/cloud/high-availability/monitor#metrics)", "fieldConfig": { "defaults": { "color": { @@ -1276,29 +1278,33 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", + "fieldCon)) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "Schedule Buffer Overruns", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "fieldCon)) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "Schedule Buffer Overruns", + "type": "timeseries" + }, + { + "datasource":eInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -2084,7 +2090,7 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", + "description": "The Sync Match Rate measures the rate of Tasks that can be delivered to Workers without having to be persisted (Workers are up and available to pick them up) to the rate of all delivered Tasks. [more info](https://docs.temporal.io/production-deployment/cloud/worker-health#sync-match-rate)", "fieldConfig": { "defaults": { "color": { @@ -2136,7 +2142,8 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, @@ -2146,7 +2153,7 @@ "x": 0, "y": 71 }, - "id": 24, + "id": 37, "options": { "legend": { "calcs": [ @@ -2172,14 +2179,16 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": true, + "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Worker Poll Success", + "title": "Sync Match Rate", "type": "timeseries" }, { @@ -2249,7 +2258,7 @@ "x": 12, "y": 71 }, - "id": 26, + "id": 38, "options": { "legend": { "calcs": [ @@ -2275,14 +2284,16 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": true, + "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Worker Poll Timeout", + "title": "Poll Success Rate", "type": "timeseries" }, { @@ -2352,7 +2363,7 @@ "x": 0, "y": 80 }, - "id": 25, + "id": 24, "options": { "legend": { "calcs": [ @@ -2378,14 +2389,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Success Sync", + "title": "Worker Poll Success", "type": "timeseries" }, { @@ -2455,7 +2466,7 @@ "x": 12, "y": 80 }, - "id": 27, + "id": 26, "options": { "legend": { "calcs": [ @@ -2482,13 +2493,13 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Success Async", + "title": "Worker Poll Timeout", "type": "timeseries" }, { @@ -2548,8 +2559,7 @@ "value": 80 } ] - }, - "unit": "percentunit" + } }, "overrides": [] }, @@ -2559,7 +2569,7 @@ "x": 0, "y": 89 }, - "id": 37, + "id": 25, "options": { "legend": { "calcs": [ @@ -2585,16 +2595,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", "exemplar": true, - "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", + "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Sync Match Rate", + "title": "Worker Poll Success Sync", "type": "timeseries" }, { @@ -2664,7 +2672,7 @@ "x": 12, "y": 89 }, - "id": 38, + "id": 27, "options": { "legend": { "calcs": [ @@ -2690,16 +2698,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", - "exemplar": true, - "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Poll Success Rate", + "title": "Worker Poll Success Async", "type": "timeseries" }, { @@ -3029,7 +3035,7 @@ "type": "stat" } ], - "refresh": "", + "refresh": false, "schemaVersion": 40, "tags": [], "templating": { @@ -3067,12 +3073,13 @@ ] }, "time": { - "from": "now-24h", + "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Temporal Cloud External Metrics", + "uid": "fem3anbu421hcc", "version": 1, "weekStart": "" } From 15efb6c0a825feb4b6a75cdef1f33cf1eacfad03 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Wed, 28 May 2025 10:16:55 -0500 Subject: [PATCH 3/7] make actions panel active only --- cloud/temporal_cloud.json | 122 ++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 64 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index c32944c..f4d1447 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -172,7 +172,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\", namespace_mode=\"active\"}[$__rate_interval])) by (temporal_namespace)", "instant": false, "interval": "", "legendFormat": "{{temporal_namespace}}", @@ -1278,33 +1278,29 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "fieldCon)) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", - "range": true, - "refId": "A" - } - ], - "title": "Schedule Buffer Overruns", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" - }, - "fieldCon)) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", - "range": true, - "refId": "A" - } - ], - "title": "Schedule Buffer Overruns", - "type": "timeseries" - }, - { - "datasource":eInterpolation": "linear", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1556,43 +1552,41 @@ ] } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "overrides": [ "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "overrides": [ "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ "color": "green", + "value": null + }, + { + tend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", "interval": "", "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, From 602d765af8794d568ae958ddb27b25e7977252d0 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Wed, 28 May 2025 14:13:24 -0500 Subject: [PATCH 4/7] move poll sync/async closer to syncmatch --- cloud/temporal_cloud.json | 126 +++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 64 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index f4d1447..8b15c77 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -849,19 +849,16 @@ "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", + "title": "Multi-region Namespaces", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The transmission delay of Workflow updates and history events from the primary to the replica. [more info](https://docs.temporal.io/cloud/high-availability/monitor#metrics)", + "fieldConfig": {neInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -1552,41 +1549,43 @@ ] } }, - "overrides": [ "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, - "overrides": [ "color": "green", - "value": null - }, - { - tend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", "interval": "", "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, @@ -2252,7 +2251,7 @@ "x": 12, "y": 71 }, - "id": 38, + "id": 25, "options": { "legend": { "calcs": [ @@ -2278,16 +2277,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "code", "exemplar": true, - "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", + "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", - "range": true, "refId": "A" } ], - "title": "Poll Success Rate", + "title": "Worker Poll Success Sync", "type": "timeseries" }, { @@ -2357,7 +2354,7 @@ "x": 0, "y": 80 }, - "id": 24, + "id": 27, "options": { "legend": { "calcs": [ @@ -2384,13 +2381,13 @@ "uid": "${datasource}" }, "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Success", + "title": "Worker Poll Success Async", "type": "timeseries" }, { @@ -2563,7 +2560,7 @@ "x": 0, "y": 89 }, - "id": 25, + "id": 24, "options": { "legend": { "calcs": [ @@ -2589,14 +2586,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", "refId": "A" } ], - "title": "Worker Poll Success Sync", + "title": "Worker Poll Success", "type": "timeseries" }, { @@ -2666,7 +2663,7 @@ "x": 12, "y": 89 }, - "id": 27, + "id": 38, "options": { "legend": { "calcs": [ @@ -2692,14 +2689,16 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": true, + "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], - "title": "Worker Poll Success Async", + "title": "Poll Success Rate", "type": "timeseries" }, { @@ -3074,6 +3073,5 @@ "timezone": "", "title": "Temporal Cloud External Metrics", "uid": "fem3anbu421hcc", - "version": 1, "weekStart": "" } From 404025a3076bb06b16aa64e7673a2e05fba73874 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Thu, 29 May 2025 14:48:39 -0500 Subject: [PATCH 5/7] more tooltips More tooltips, links to docs. Added a few missing graphs. rearranged some rows. --- cloud/temporal_cloud.json | 1054 ++++++++++++++++++++++++------------- 1 file changed, 679 insertions(+), 375 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index 8b15c77..57077fa 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -82,7 +82,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "Approximate count of Temporal Cloud Actions per-second. Use `namespace_mode` to filter for active/standby actions for multi-region namespaces. ", + "description": "Approximate count of Temporal Cloud Actions per-second. Use `namespace_mode` to filter for active/standby actions for multi-region namespaces. More information about [billable actions](https://docs.temporal.io/cloud/pricing#action).", "fieldConfig": { "defaults": { "color": { @@ -243,10 +243,12 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[30d])) by (temporal_namespace)", + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\", namespace_mode=\"active\"}[30d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], @@ -313,10 +315,13 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[7d])) by (temporal_namespace)", + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\", namespace_mode=\"active\"}[7d])) by (temporal_namespace)", + "instant": false, "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], @@ -383,10 +388,12 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[1d])) by (temporal_namespace)", + "expr": "sum(increase(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\", namespace_mode=\"active\"}[1d])) by (temporal_namespace)", "interval": "", "legendFormat": "{{temporal_namespace}}", + "range": true, "refId": "A" } ], @@ -411,7 +418,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", + "description": "Workflows that successfully completed.", "fieldConfig": { "defaults": { "color": { @@ -514,7 +521,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", + "description": "Workflows that timed out before completing execution.", "fieldConfig": { "defaults": { "color": { @@ -619,7 +626,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", + "description": "Workflows that failed before completion.", "fieldConfig": { "defaults": { "color": { @@ -722,7 +729,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", + "description": "Workflow Executions that were Continued-As-New from a past execution.", "fieldConfig": { "defaults": { "color": { @@ -820,25 +827,12 @@ "title": "Workflow Continued as New", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 45, - "panels": [], - "title": "Multi-region Namespaces", - "type": "row" - }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "The transmission delay of Workflow updates and history events from the primary to the replica. [more info](https://docs.temporal.io/cloud/high-availability/monitor#metrics)", + "description": "Workflows terminated before completing execution.", "fieldConfig": { "defaults": { "color": { @@ -849,16 +843,19 @@ "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", - "title": "Multi-region Namespaces", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "The transmission delay of Workflow updates and history events from the primary to the replica. [more info](https://docs.temporal.io/cloud/high-availability/monitor#metrics)", - "fieldConfig": {neInterpolation": "linear", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -887,8 +884,7 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, @@ -896,19 +892,21 @@ "h": 9, "w": 12, "x": 0, - "y": 30 + "y": 29 }, - "id": 47, + "id": 48, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -923,32 +921,23 @@ "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "expr": "sum(rate(temporal_cloud_v0_replication_lag_sum{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)\n/\nsum(rate(temporal_cloud_v0_replication_lag_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "legendFormat": "{{ temporal_namespace }} - p50", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_workflow_terminate_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_replication_lag_bucket{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - p99", - "range": true, - "refId": "B" } ], - "title": "Replication Lag", + "title": "Workflow Terminations", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Workflows canceled before completing execution.", "fieldConfig": { "defaults": { "color": { @@ -1008,9 +997,9 @@ "h": 9, "w": 12, "x": 12, - "y": 30 + "y": 29 }, - "id": 46, + "id": 49, "options": { "legend": { "calcs": [ @@ -1034,19 +1023,18 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, namespace_mode)", - "instant": false, + "expr": "sum(rate(temporal_cloud_v0_workflow_cancel_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", "interval": "", - "legendFormat": "{{temporal_namespace}} - {{ namespace_mode }}", + "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" } ], - "title": "Actions", + "title": "Workflow Cancellations", "type": "timeseries" }, { @@ -1055,7 +1043,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 38 }, "id": 40, "panels": [], @@ -1065,8 +1053,9 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Successful execution of a Scheduled Workflow.", "fieldConfig": { "defaults": { "color": { @@ -1126,7 +1115,7 @@ "h": 9, "w": 12, "x": 0, - "y": 40 + "y": 39 }, "id": 11, "options": { @@ -1152,7 +1141,7 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": false, @@ -1169,8 +1158,9 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "When average schedule run length is greater than average schedule interval while a buffer_all overlap policy is configured.", "fieldConfig": { "defaults": { "color": { @@ -1230,7 +1220,7 @@ "h": 9, "w": 4, "x": 12, - "y": 40 + "y": 39 }, "id": 42, "options": { @@ -1256,7 +1246,7 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": false, @@ -1273,8 +1263,9 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Skipped Scheduled executions when Workflows were delayed longer than the catchup window.", "fieldConfig": { "defaults": { "color": { @@ -1334,7 +1325,7 @@ "h": 9, "w": 4, "x": 16, - "y": 40 + "y": 39 }, "id": 43, "options": { @@ -1360,7 +1351,7 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": false, @@ -1377,8 +1368,9 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Workflows that were delayed due to exceeding a rate limit.", "fieldConfig": { "defaults": { "color": { @@ -1438,7 +1430,7 @@ "h": 9, "w": 4, "x": 20, - "y": 40 + "y": 39 }, "id": 44, "options": { @@ -1464,7 +1456,7 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": false, @@ -1484,18 +1476,19 @@ "h": 1, "w": 24, "x": 0, - "y": 49 + "y": 48 }, - "id": 19, + "id": 23, "panels": [], - "title": "Temporal Cloud gRPC Requests", + "title": "Workers and Tasks", "type": "row" }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "The Sync Match Rate measures the rate of Tasks that can be delivered to Workers without having to be persisted (Workers are up and available to pick them up) to the rate of all delivered Tasks. [more info](https://docs.temporal.io/production-deployment/cloud/worker-health#sync-match-rate)\n\nShould always be near 100%.", "fieldConfig": { "defaults": { "color": { @@ -1547,7 +1540,8 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, @@ -1555,9 +1549,9 @@ "h": 9, "w": 12, "x": 0, - "y": 50 + "y": 49 }, - "id": 6, + "id": 37, "options": { "legend": { "calcs": [ @@ -1581,156 +1575,197 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", "interval": "", - "legendFormat": "{{temporal_namespace}} - {{operation}}", + "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" } ], - "title": "Frontend Service Requests", + "title": "Sync Match Rate", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Tasks that are successfully matched to a poller.", "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.4.0", - "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_frontend_service_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", - "hide": false, - "interval": "", - "legendFormat": "{{temporal_namespace}} - {{operation}}", - "range": true, - "refId": "A" - }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" - }, - "editorMode": "code", - "expr": "sum(rate(temporal_cloud_v0_resource_exhausted_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation,resource_exhausted_cause)", - "hide": false, - "instant": false, - "legendFormat": "Rate Limited - {{ resource_exhausted_cause }} / {{temporal_namespace}}", - "range": true, - "refId": "B" - } - ], - "title": "Frontend Service Errors", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 59 - }, - "id": 36, - "panels": [], - "title": "Temporal Service Latency", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { + { +e": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", + "refId": "A" + } + ], + "title": "Worker Poll Success", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", "uid": "${datasource}" }, + "description": "Tasks that are successfully sync matched to a poller.", "fieldConfig": { "defaults": { "color": { @@ -1782,28 +1817,29 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { - "h": 10, - "w": 8, + "h": 9, + "w": 12, "x": 0, - "y": 60 + "y": 58 }, - "id": 30, + "id": 25, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -1817,33 +1853,22 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "legendFormat": "{{ temporal_namespace }} - 50th Percentile", - "range": true, + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - 99th Percentile", - "range": true, - "refId": "B" } ], - "title": "StartWorkflowExecution Latency", + "title": "Worker Poll Success Sync", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, + "description": "Tasks that are successfully async matched to a poller.", "fieldConfig": { "defaults": { "color": { @@ -1895,28 +1920,29 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 60 + "h": 9, + "w": 12, + "x": 12, + "y": 58 }, - "id": 1, + "id": 27, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -1928,28 +1954,124 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "legendFormat": "{{ temporal_namespace }} - 50th Percentile", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "range": true, "refId": "A" + } + ], + "title": "Worker Poll Success Async", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Successful task matching to poller\n/\nAll polling requests (including timeouts)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "exemplar": true, + "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "SignalWorkflowExecution Latency", + "title": "Poll Success Rate", "type": "timeseries" }, { @@ -1957,6 +2079,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "When no tasks are available for a poller before timing out. If this is high, you may have too many pollers.", "fieldConfig": { "defaults": { "color": { @@ -2008,28 +2131,29 @@ "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 60 + "h": 9, + "w": 12, + "x": 12, + "y": 67 }, - "id": 35, + "id": 26, "options": { "legend": { "calcs": [ "lastNotNull", - "mean", + "min", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -2043,47 +2167,140 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "legendFormat": "{{ temporal_namespace }} - 50th Percentile", - "range": true, + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "interval": "", + "legendFormat": "{{temporal_namespace}}", "refId": "A" + } + ], + "title": "Worker Poll Timeout", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 76 + }, + "id": 19, + "panels": [], + "title": "Temporal Cloud gRPC Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "A count of gRPC requests received aggregated by operation.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", - "hide": false, - "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "exemplar": true, + "expr": "sum(rate(temporal_cloud_v0_frontend_service_request_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "interval": "", + "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "SignalWithStartWorkflowExecution Latency", + "title": "Frontend Service Requests", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 23, - "panels": [], - "title": "Workers and Tasks", - "type": "row" - }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, - "description": "The Sync Match Rate measures the rate of Tasks that can be delivered to Workers without having to be persisted (Workers are up and available to pick them up) to the rate of all delivered Tasks. [more info](https://docs.temporal.io/production-deployment/cloud/worker-health#sync-match-rate)", + "description": "Count of gRPC errors returned aggregated by operation, including resource exhausted errors.", "fieldConfig": { "defaults": { "color": { @@ -2135,18 +2352,17 @@ "value": 80 } ] - }, - "unit": "percentunit" + } }, "overrides": [] }, "gridPos": { "h": 9, "w": 12, - "x": 0, - "y": 71 + "x": 12, + "y": 77 }, - "id": 37, + "id": 8, "options": { "legend": { "calcs": [ @@ -2170,26 +2386,53 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, "editorMode": "code", "exemplar": true, - "expr": "sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n/\nsum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )", + "expr": "sum(rate(temporal_cloud_v0_frontend_service_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation)", + "hide": false, "interval": "", - "legendFormat": "{{temporal_namespace}}", + "legendFormat": "{{temporal_namespace}} - {{operation}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(temporal_cloud_v0_resource_exhausted_error_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace,operation,resource_exhausted_cause)", + "hide": false, + "instant": false, + "legendFormat": "Rate Limited - {{ resource_exhausted_cause }} / {{temporal_namespace}}", + "range": true, + "refId": "B" } ], - "title": "Sync Match Rate", + "title": "Frontend Service Errors", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 86 + }, + "id": 36, + "panels": [], + "title": "Temporal Service Latency", + "type": "row" + }, { "datasource": { "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", + "description": "Should generally be under 200ms", "fieldConfig": { "defaults": { "color": { @@ -2241,28 +2484,29 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 71 + "h": 10, + "w": 8, + "x": 0, + "y": 87 }, - "id": 25, + "id": 30, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Max", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { @@ -2277,22 +2521,34 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": true, - "expr": "sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "legendFormat": "{{ temporal_namespace }} - 50th Percentile", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"StartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "range": true, + "refId": "B" } ], - "title": "Worker Poll Success Sync", + "title": "StartWorkflowExecution Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, - "description": "", + "description": "Should generally be under 200ms", "fieldConfig": { "defaults": { "color": { @@ -2344,29 +2600,28 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 80 + "h": 10, + "w": 8, + "x": 8, + "y": 87 }, - "id": 27, + "id": 1, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", @@ -2378,16 +2633,28 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace) - sum(rate(temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "legendFormat": "{{ temporal_namespace }} - 50th Percentile", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "range": true, + "refId": "B" } ], - "title": "Worker Poll Success Async", + "title": "SignalWorkflowExecution Latency", "type": "timeseries" }, { @@ -2395,7 +2662,7 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", + "description": "Should generally be under 200ms", "fieldConfig": { "defaults": { "color": { @@ -2447,29 +2714,28 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 80 + "h": 10, + "w": 8, + "x": 16, + "y": 87 }, - "id": 26, + "id": 35, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", @@ -2483,22 +2749,47 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "legendFormat": "{{ temporal_namespace }} - 50th Percentile", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_EXTERNAL_METRICS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_service_latency_bucket{temporal_namespace=~\"$temporal_namespace\", operation=\"SignalWithStartWorkflowExecution\"}[$__rate_interval])) by (temporal_namespace, operation, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - 99th Percentile", + "range": true, + "refId": "B" } ], - "title": "Worker Poll Timeout", + "title": "SignalWithStartWorkflowExecution Latency", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 97 + }, + "id": 45, + "panels": [], + "title": "Multi-region Namespaces", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${DS_EXTERNAL_METRICS}" + "uid": "${datasource}" }, - "description": "", + "description": "The transmission delay of Workflow updates and history events from the primary to the replica. [more info](https://docs.temporal.io/cloud/high-availability/monitor#metrics)", "fieldConfig": { "defaults": { "color": { @@ -2550,7 +2841,8 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, @@ -2558,21 +2850,19 @@ "h": 9, "w": 12, "x": 0, - "y": 89 + "y": 98 }, - "id": 24, + "id": 47, "options": { "legend": { "calcs": [ "lastNotNull", - "min", + "mean", "max" ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", @@ -2584,16 +2874,28 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "${DS_EXTERNAL_METRICS}" }, - "exemplar": false, - "expr": "sum(rate(temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", - "interval": "", - "legendFormat": "{{temporal_namespace}}", + "editorMode": "code", + "expr": "sum(rate(temporal_cloud_v0_replication_lag_sum{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)\n/\nsum(rate(temporal_cloud_v0_replication_lag_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace)", + "legendFormat": "{{ temporal_namespace }} - p50", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(temporal_cloud_v0_replication_lag_bucket{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, le))", + "hide": false, + "legendFormat": "{{ temporal_namespace }} - p99", + "range": true, + "refId": "B" } ], - "title": "Worker Poll Success", + "title": "Replication Lag", "type": "timeseries" }, { @@ -2601,7 +2903,7 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", + "description": "[Billable actions](https://docs.temporal.io/cloud/pricing#action) split across active/standby namespaces for multi region namespaces.", "fieldConfig": { "defaults": { "color": { @@ -2661,9 +2963,9 @@ "h": 9, "w": 12, "x": 12, - "y": 89 + "y": 98 }, - "id": 38, + "id": 46, "options": { "legend": { "calcs": [ @@ -2690,15 +2992,16 @@ "uid": "${datasource}" }, "editorMode": "code", - "exemplar": true, - "expr": "(\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n /\n (\n (\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_success_sync_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n +\n sum by(temporal_namespace) (\n rate(\n temporal_cloud_v0_poll_timeout_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval]\n )\n )\n )\n)", + "exemplar": false, + "expr": "sum(rate(temporal_cloud_v0_total_action_count{temporal_namespace=~\"$temporal_namespace\"}[$__rate_interval])) by (temporal_namespace, namespace_mode)", + "instant": false, "interval": "", - "legendFormat": "{{temporal_namespace}}", + "legendFormat": "{{temporal_namespace}} - {{ namespace_mode }}", "range": true, "refId": "A" } ], - "title": "Poll Success Rate", + "title": "Actions", "type": "timeseries" }, { @@ -2707,7 +3010,7 @@ "h": 1, "w": 24, "x": 0, - "y": 98 + "y": 107 }, "id": 17, "panels": [], @@ -2719,7 +3022,7 @@ "type": "prometheus", "uid": "${DS_EXTERNAL_METRICS}" }, - "description": "", + "description": "Count of state transitions for each Namespace.", "fieldConfig": { "defaults": { "color": { @@ -2779,7 +3082,7 @@ "h": 9, "w": 12, "x": 0, - "y": 99 + "y": 108 }, "id": 2, "options": { @@ -2850,7 +3153,7 @@ "h": 9, "w": 4, "x": 12, - "y": 99 + "y": 108 }, "id": 10, "options": { @@ -2920,7 +3223,7 @@ "h": 9, "w": 4, "x": 16, - "y": 99 + "y": 108 }, "id": 14, "options": { @@ -2990,7 +3293,7 @@ "h": 9, "w": 4, "x": 20, - "y": 99 + "y": 108 }, "id": 15, "options": { @@ -3073,5 +3376,6 @@ "timezone": "", "title": "Temporal Cloud External Metrics", "uid": "fem3anbu421hcc", + "version": 13, "weekStart": "" } From 79ff6272ed22bf3558d19316a7f2136572db327e Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Thu, 29 May 2025 15:16:26 -0500 Subject: [PATCH 6/7] more tweaks --- cloud/temporal_cloud.json | 190 ++++++++++++-------------------------- 1 file changed, 58 insertions(+), 132 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index 57077fa..4c2709a 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -134,7 +134,8 @@ "value": 80 } ] - } + }, + "unit": "APS" }, "overrides": [] }, @@ -1596,137 +1597,62 @@ }, "description": "Tasks that are successfully matched to a poller.", "fieldConfig": { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { - { -e": "off" - } + "defaults": { + "color": { + "mode": "palette-classic" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 24, - "options": { - "legend": { + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + : false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + : false, + "viz": false + }, + "insertNulls": false, + egend": { "calcs": [ "lastNotNull", "min", @@ -3376,6 +3302,6 @@ e": "off" "timezone": "", "title": "Temporal Cloud External Metrics", "uid": "fem3anbu421hcc", - "version": 13, + "version": 1, "weekStart": "" } From 7f3a9feacc956e5e0d35b0c8515d51bd58b8b000 Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Thu, 29 May 2025 15:18:45 -0500 Subject: [PATCH 7/7] fix json --- cloud/temporal_cloud.json | 53 ++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/cloud/temporal_cloud.json b/cloud/temporal_cloud.json index 4c2709a..85673d3 100644 --- a/cloud/temporal_cloud.json +++ b/cloud/temporal_cloud.json @@ -1631,28 +1631,35 @@ "mode": "none" }, "thresholdsStyle": { - : false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - : false, - "viz": false - }, - "insertNulls": false, - egend": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 24, + "options": { + "legend": { "calcs": [ "lastNotNull", "min", @@ -3304,4 +3311,4 @@ "uid": "fem3anbu421hcc", "version": 1, "weekStart": "" -} +} \ No newline at end of file