Skip to content

Commit c29c61d

Browse files
datadog: implement changes from code review
1 parent 791dd09 commit c29c61d

File tree

1 file changed

+30
-35
lines changed

1 file changed

+30
-35
lines changed

datadog/serializer.go

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ func isTrim(b byte) bool { return b == '.' || b == '_' || b == '-' }
241241
// Result: 2.7x faster on typical workloads (25ns → 9.5ns per metric name). The fast-path
242242
// check costs ~2-3ns even when transformation is needed, making it always beneficial.
243243
func appendSanitizedMetricName(dst []byte, raw string) []byte {
244-
orig := len(dst)
244+
origLen := len(dst)
245245
if raw == "" {
246246
if len(dst) == 0 {
247247
return append(dst, "_unnamed_"...)
@@ -252,9 +252,9 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
252252
// Fast path: check if string is pure valid ASCII (common case)
253253
// Most metric names like "http.server.request.duration" hit this path
254254
needsTransform := false
255-
for i := 0; i < len(raw); i++ {
255+
for i := range len(raw) {
256256
c := raw[i]
257-
if c >= 128 || !valid[c] {
257+
if c >= utf8.RuneSelf || !valid[c] {
258258
needsTransform = true
259259
break
260260
}
@@ -263,27 +263,24 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
263263
// If no transformation needed, just copy and trim
264264
if !needsTransform {
265265
// Respect maxLen
266-
copyLen := len(raw)
267-
if copyLen > maxLen {
268-
copyLen = maxLen
269-
}
266+
copyLen := min(len(raw), maxLen)
270267
dst = append(dst, raw[:copyLen]...)
271268

272269
// Trim leading/trailing '.', '_' or '-'
273-
start, end := orig, len(dst)
270+
start, end := origLen, len(dst)
274271
for start < end && isTrim(dst[start]) {
275272
start++
276273
}
277274
for end > start && isTrim(dst[end-1]) {
278275
end--
279276
}
280277

281-
if start > orig || end < len(dst) {
282-
copy(dst[orig:], dst[start:end])
283-
dst = dst[:orig+(end-start)]
278+
if start > origLen || end < len(dst) {
279+
copy(dst[origLen:], dst[start:end])
280+
dst = dst[:origLen+(end-start)]
284281
}
285282

286-
if len(dst) == orig {
283+
if len(dst) == origLen {
287284
return append(dst, "_truncated_"...)
288285
}
289286
return dst
@@ -292,32 +289,31 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
292289
// Slow path: needs transformation (has unicode, invalid chars, etc)
293290
nameLen := 0
294291
lastWasRepl := false
295-
b := []byte(raw)
296-
for i := 0; i < len(b); {
297-
c := b[i]
298-
299-
if c < 128 && valid[c] {
300-
dst = append(dst, c)
301-
nameLen++
302-
lastWasRepl = false
303-
i++
304-
} else if c < 128 {
305-
if !lastWasRepl {
306-
dst = append(dst, replacement)
292+
for _, r := range raw {
293+
if r < utf8.RuneSelf {
294+
// ASCII byte
295+
if valid[byte(r)] {
296+
dst = append(dst, byte(r))
307297
nameLen++
308-
lastWasRepl = true
298+
lastWasRepl = false
299+
} else {
300+
// Invalid ASCII character
301+
if !lastWasRepl {
302+
dst = append(dst, replacement)
303+
nameLen++
304+
lastWasRepl = true
305+
}
309306
}
310-
i++
311307
} else {
312-
r, size := utf8.DecodeRune(b[i:])
313-
308+
// Non-ASCII rune
309+
// Check if rune is in Latin-1 Supplement block (U+00C0 to U+00FF)
310+
// This includes common accented characters like À, É, ñ, etc.
314311
if r >= 0xC0 && r <= 0xFF {
315312
mapped := latin1SupplementMap[r]
316313
if valid[mapped] {
317314
dst = append(dst, mapped)
318315
nameLen++
319316
lastWasRepl = false
320-
i += size
321317
if nameLen >= maxLen {
322318
break
323319
}
@@ -330,7 +326,6 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
330326
nameLen++
331327
lastWasRepl = true
332328
}
333-
i += size
334329
}
335330

336331
if nameLen >= maxLen {
@@ -339,20 +334,20 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
339334
}
340335

341336
// Trim
342-
start, end := orig, len(dst)
337+
start, end := origLen, len(dst)
343338
for start < end && isTrim(dst[start]) {
344339
start++
345340
}
346341
for end > start && isTrim(dst[end-1]) {
347342
end--
348343
}
349344

350-
if start > orig || end < len(dst) {
351-
copy(dst[orig:], dst[start:end])
352-
dst = dst[:orig+(end-start)]
345+
if start > origLen || end < len(dst) {
346+
copy(dst[origLen:], dst[start:end])
347+
dst = dst[:origLen+(end-start)]
353348
}
354349

355-
if len(dst) == orig {
350+
if len(dst) == origLen {
356351
return append(dst, "_truncated_"...)
357352
}
358353
return dst

0 commit comments

Comments
 (0)