Skip to content

Commit fb309fc

Browse files
authored
feat: Add request_duration metric for each endpoint (#634)
1 parent c5919a9 commit fb309fc

7 files changed

Lines changed: 140 additions & 10 deletions

File tree

internal/metrics/constants.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ const (
2121

2222
privatePollingRequestsMeasureName = "internal_polling_requests"
2323

24-
requestMeasureName = "requests"
24+
requestMeasureName = "requests"
25+
requestDurationMeasureName = "request_duration"
2526

2627
defaultFlushInterval = time.Minute
2728

internal/metrics/measures.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package metrics
22

33
import (
44
"context"
5+
"time"
56

67
"github.com/launchdarkly/ld-relay/v8/internal/logging"
78

@@ -19,9 +20,10 @@ var (
1920
//
2021
// To avoid having to put nolint:gochecknoglobals on everything here, that linter is excluded
2122
// specifically for this file in .golangci-lint.yml.
22-
connMeasure = stats.Int64(connMeasureName, "current number of connections", stats.UnitDimensionless)
23-
newConnMeasure = stats.Int64(newConnMeasureName, "total number of connections", stats.UnitDimensionless)
24-
requestMeasure = stats.Int64(requestMeasureName, "Number of hits to a route", stats.UnitDimensionless)
23+
connMeasure = stats.Int64(connMeasureName, "current number of connections", stats.UnitDimensionless)
24+
newConnMeasure = stats.Int64(newConnMeasureName, "total number of connections", stats.UnitDimensionless)
25+
requestMeasure = stats.Int64(requestMeasureName, "Number of hits to a route", stats.UnitDimensionless)
26+
requestDurationMeasure = stats.Float64(requestDurationMeasureName, "request duration in microseconds", stats.UnitDimensionless)
2527

2628
// For internal event exporter
2729
privateConnMeasure = stats.Int64(privateConnMeasureName, "current number of connections", stats.UnitDimensionless)
@@ -120,3 +122,22 @@ func WithRouteCount(ctx context.Context, userAgent, sdkWrapper, route, method st
120122

121123
WithCount(ctx, userAgent, sdkWrapper, f, measure)
122124
}
125+
126+
// RecordRequestDuration records a request duration measurement. The context must already contain
127+
// the environment and relay ID tags (from the EnvironmentManager's OpenCensus context). The route
128+
// and method tags are added here.
129+
func RecordRequestDuration(ctx context.Context, userAgent, sdkWrapper, route, method string, duration time.Duration, measure Measure) {
130+
tagCtx, err := tag.New(ctx,
131+
tag.Insert(userAgentTagKey, sanitizeTagValue(userAgent)),
132+
tag.Insert(sdkWrapperTagKey, sanitizeTagValue(sdkWrapper)),
133+
tag.Insert(routeTagKey, sanitizeTagValue(route)),
134+
tag.Insert(methodTagKey, sanitizeTagValue(method)),
135+
)
136+
if err != nil {
137+
return
138+
}
139+
for _, mut := range measure.tags {
140+
tagCtx, _ = tag.New(tagCtx, mut)
141+
}
142+
stats.Record(tagCtx, requestDurationMeasure.M(float64(duration.Microseconds())))
143+
}

internal/metrics/views.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ var (
2222
Aggregation: view.Count(),
2323
TagKeys: append(publicTags, routeTagKey, methodTagKey),
2424
}
25+
requestDurationView *view.View = &view.View{ //nolint:gochecknoglobals
26+
Measure: requestDurationMeasure,
27+
Aggregation: view.Distribution(10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000),
28+
TagKeys: append(publicTags, routeTagKey, methodTagKey),
29+
}
2530
privateConnView *view.View = &view.View{ //nolint:gochecknoglobals
2631
Measure: privateConnMeasure,
2732
Aggregation: view.Sum(),
@@ -43,7 +48,7 @@ var (
4348
)
4449

4550
func getPublicViews() []*view.View {
46-
return []*view.View{publicConnView, publicNewConnView, requestView}
51+
return []*view.View{publicConnView, publicNewConnView, requestView, requestDurationView}
4752
}
4853

4954
func getPrivateViews() []*view.View {

internal/middleware/metrics_middleware.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package middleware
22

33
import (
44
"net/http"
5+
"strings"
6+
"time"
57

68
"github.com/launchdarkly/ld-relay/v8/internal/metrics"
79

@@ -53,7 +55,8 @@ func PollingRequestCount(handler http.Handler) http.Handler {
5355
return withCount(handler, metrics.PollingRequests)
5456
}
5557

56-
// RequestCount is a middleware function that increments the specified metric for each request.
58+
// RequestCount is a middleware function that increments the specified metric for each request
59+
// and records the request duration (excluding streaming responses).
5760
func RequestCount(measure metrics.Measure) mux.MiddlewareFunc {
5861
return func(next http.Handler) http.Handler {
5962
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
@@ -62,9 +65,14 @@ func RequestCount(measure metrics.Measure) mux.MiddlewareFunc {
6265
sdkWrapper := getSDKWrapper(req)
6366
// Ignoring internal routing error that would have been ignored anyway
6467
route, _ := mux.CurrentRoute(req).GetPathTemplate()
68+
start := time.Now()
6569
metrics.WithRouteCount(ctx.Env.GetMetricsContext(), userAgent, sdkWrapper, route, req.Method, func() {
6670
next.ServeHTTP(w, req)
6771
}, measure)
72+
// Don't record duration for streaming responses — their lifetime is unbounded
73+
if !strings.HasPrefix(strings.ToLower(w.Header().Get("Content-Type")), "text/event-stream") {
74+
metrics.RecordRequestDuration(ctx.Env.GetMetricsContext(), userAgent, sdkWrapper, route, req.Method, time.Since(start), measure)
75+
}
6876
})
6977
}
7078
}

internal/middleware/metrics_middleware_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,72 @@ func testCountRequests(t *testing.T, measure metrics.Measure, category string) {
171171
})
172172
})
173173
}
174+
175+
func TestRequestDuration(t *testing.T) {
176+
t.Run("records duration for non-streaming request", func(t *testing.T) {
177+
router := mux.NewRouter()
178+
router.Use(RequestCount(metrics.ServerRequests))
179+
router.Handle("/duration-test", nullHandler()).Methods("GET")
180+
181+
metricsMiddlewareTest(t, func(p metricsMiddlewareTestParams) {
182+
expectedTags := map[string]string{
183+
"env": p.envName,
184+
"method": "GET",
185+
"route": "_duration-test",
186+
"platformCategory": "server",
187+
"userAgent": metricsTestUserAgent,
188+
"sdkWrapper": "not-provided",
189+
}
190+
191+
req, _ := http.NewRequest("GET", "/duration-test", nil)
192+
req.Header.Set("User-Agent", metricsTestUserAgent)
193+
req = req.WithContext(WithEnvContextInfo(req.Context(), EnvContextInfo{Env: p.env}))
194+
router.ServeHTTP(httptest.NewRecorder(), req)
195+
196+
p.exporter.AwaitData(t, time.Second, p.mockLog.Loggers, func(d st.TestMetricsData) bool {
197+
return d.HasRowMatching("request_duration", expectedTags, func(row st.TestMetricsRow) bool {
198+
return row.DistributionCount >= 1
199+
})
200+
})
201+
})
202+
})
203+
204+
t.Run("does not record duration for streaming request", func(t *testing.T) {
205+
router := mux.NewRouter()
206+
router.Use(RequestCount(metrics.ServerRequests))
207+
// Handler that sets Content-Type: text/event-stream (like the streaming middleware does)
208+
router.Handle("/stream-route", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
209+
w.Header().Set("Content-Type", "text/event-stream")
210+
})).Methods("GET")
211+
212+
metricsMiddlewareTest(t, func(p metricsMiddlewareTestParams) {
213+
streamTags := map[string]string{
214+
"env": p.envName,
215+
"method": "GET",
216+
"route": "_stream-route",
217+
"platformCategory": "server",
218+
"userAgent": metricsTestUserAgent,
219+
"sdkWrapper": "not-provided",
220+
}
221+
222+
req, _ := http.NewRequest("GET", "/stream-route", nil)
223+
req.Header.Set("User-Agent", metricsTestUserAgent)
224+
req = req.WithContext(WithEnvContextInfo(req.Context(), EnvContextInfo{Env: p.env}))
225+
router.ServeHTTP(httptest.NewRecorder(), req)
226+
227+
// The request count should still be recorded
228+
p.exporter.AwaitData(t, time.Second, p.mockLog.Loggers, func(d st.TestMetricsData) bool {
229+
return d.HasRow("requests", st.TestMetricsRow{
230+
Tags: streamTags,
231+
Count: 1,
232+
})
233+
})
234+
235+
// But there should be no duration data for this specific route
236+
lastData := p.exporter.GetLastData()
237+
require.False(t, lastData.HasRowMatching("request_duration", streamTags, func(st.TestMetricsRow) bool {
238+
return true
239+
}), "streaming request should not have duration recorded")
240+
})
241+
})
242+
}

internal/sharedtest/metrics.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,23 @@ func (d TestMetricsData) HasRow(viewName string, expectedRow TestMetricsRow) boo
3636
return false
3737
}
3838

39+
// HasRowMatching returns true if any row for the specified view name has matching tags
40+
// and satisfies the predicate function.
41+
func (d TestMetricsData) HasRowMatching(viewName string, tags map[string]string, predicate func(TestMetricsRow) bool) bool {
42+
for _, r := range d[viewName] {
43+
if reflect.DeepEqual(r.Tags, tags) && predicate(r) {
44+
return true
45+
}
46+
}
47+
return false
48+
}
49+
3950
// TestMetricsRow is a simplified version of an OpenCensus view row.
4051
type TestMetricsRow struct {
41-
Tags map[string]string
42-
Count int64
43-
Sum float64
52+
Tags map[string]string
53+
Count int64
54+
Sum float64
55+
DistributionCount int64
4456
}
4557

4658
// NewTestMetricsExporter creates a TestMetricsExporter.
@@ -88,6 +100,9 @@ func (e *TestMetricsExporter) ExportView(viewData *view.Data) {
88100
if countData, ok := vr.Data.(*view.CountData); ok {
89101
tr.Count = countData.Value
90102
}
103+
if distData, ok := vr.Data.(*view.DistributionData); ok {
104+
tr.DistributionCount = distData.Count
105+
}
91106
rows = append(rows, tr)
92107
}
93108

@@ -101,6 +116,17 @@ func (e *TestMetricsExporter) ExportView(viewData *view.Data) {
101116
}
102117
}
103118

119+
// GetLastData returns a snapshot of the most recently received metrics data.
120+
func (e *TestMetricsExporter) GetLastData() TestMetricsData {
121+
e.lock.Lock()
122+
defer e.lock.Unlock()
123+
dataCopy := make(TestMetricsData)
124+
for k, v := range e.lastData {
125+
dataCopy[k] = v
126+
}
127+
return dataCopy
128+
}
129+
104130
// AwaitData waits until matching view data is received.
105131
func (e *TestMetricsExporter) AwaitData(t *testing.T, timeout time.Duration, loggers ldlog.Loggers, fn func(TestMetricsData) bool) {
106132
deadline := time.After(timeout)

internal/streams/stream_provider_ping_jitter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ import (
1111

1212
"github.com/launchdarkly/eventsource"
1313
"github.com/launchdarkly/go-sdk-common/v3/ldlog"
14-
helpers "github.com/launchdarkly/go-test-helpers/v3"
1514
"github.com/launchdarkly/go-server-sdk-evaluation/v3/ldmodel"
1615
"github.com/launchdarkly/go-server-sdk/v7/subsystems/ldstoreimpl"
16+
helpers "github.com/launchdarkly/go-test-helpers/v3"
1717

1818
"github.com/stretchr/testify/assert"
1919
"github.com/stretchr/testify/require"

0 commit comments

Comments
 (0)