simplify implementation

kevinburkesegment · kevinburkesegment · commit da2b1075e5fc · 2025-11-16T13:47:24.000-08:00
diff --git a/datadog/serializer.go b/datadog/serializer.go
@@ -222,24 +222,6 @@ func isTrim(b byte) bool { return b == '.' || b == '_' || b == '-' }
 
 // appendSanitizedMetricName converts *any* string into something that StatsD / Graphite
 // accepts without complaints.
-//
-// OPTIMIZATION NOTES:
-// This implementation was optimized through several iterations:
-//
-//  1. Replaced manual UTF-8 bit manipulation with stdlib utf8.DecodeRune for maintainability
-//     while preserving the same transformation logic (Latin-1 supplement handling, invalid
-//     char replacement).
-//
-//  2. Added fast-path pre-check for the common case: since 99% of metrics are valid ASCII
-//     (e.g., "http.server.request.duration"), we first scan to detect if transformation is
-//     needed. If not, we bulk-copy the string instead of processing byte-by-byte.
-//
-//  3. Explored SIMD (Segment's asm/ascii library) but found simple Go code is faster for
-//     short metric names (~20-40 chars) due to function call overhead. SIMD wins on longer
-//     strings (100+ chars) but those are rare in practice.
-//
-// Result: 2.7x faster on typical workloads (25ns → 9.5ns per metric name). The fast-path
-// check costs ~2-3ns even when transformation is needed, making it always beneficial.
 func appendSanitizedMetricName(dst []byte, raw string) []byte {
 	origLen := len(dst)
 	if raw == "" {
@@ -249,106 +231,37 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
 		return dst
 	}
 
-	// Fast path: check if string is pure valid ASCII (common case)
-	// Most metric names like "http.server.request.duration" hit this path
-	needsTransform := false
-	for i := range len(raw) {
-		c := raw[i]
-		if c >= utf8.RuneSelf || !valid[c] {
-			needsTransform = true
+	// Simple transformation: iterate through runes and convert/replace as needed
+	lastWasRepl := false
+	for i, r := range raw {
+		if i >= maxLen {
 			break
 		}
-	}
-
-	// If no transformation needed, just copy and trim
-	if !needsTransform {
-		// Respect maxLen
-		copyLen := min(len(raw), maxLen)
-		dst = append(dst, raw[:copyLen]...)
-
-		// Trim leading/trailing '.', '_' or '-'
-		start, end := origLen, len(dst)
-		for start < end && isTrim(dst[start]) {
-			start++
-		}
-		for end > start && isTrim(dst[end-1]) {
-			end--
-		}
-
-		if start > origLen || end < len(dst) {
-			copy(dst[origLen:], dst[start:end])
-			dst = dst[:origLen+(end-start)]
-		}
-
-		if len(dst) == origLen && len(raw) > 0 {
-			// We didn't append any characters to dst - basically this means we
-			// truncated every single character from raw because it was a dot or
-			// underscore or hyphen.
-			return append(dst, "_truncated_"...)
-		}
-		return dst
-	}
 
-	// Slow path: needs transformation (has unicode, invalid chars, etc)
-	nameLen := 0
-	lastWasRepl := false
-	for _, r := range raw {
-		if r < utf8.RuneSelf {
-			// ASCII byte
-			if valid[byte(r)] {
-				dst = append(dst, byte(r))
-				nameLen++
+		if r < utf8.RuneSelf && valid[byte(r)] {
+			// Valid ASCII character
+			dst = append(dst, byte(r))
+			lastWasRepl = false
+		} else if r >= 0xC0 && r <= 0xFF {
+			// Latin-1 Supplement block (common accented characters like À, É, ñ)
+			mapped := latin1SupplementMap[r]
+			if valid[mapped] {
+				dst = append(dst, mapped)
 				lastWasRepl = false
-			} else {
-				// Invalid ASCII character
-				if !lastWasRepl {
-					dst = append(dst, replacement)
-					nameLen++
-					lastWasRepl = true
-				}
-			}
-		} else {
-			// Non-ASCII rune
-			// Check if rune is in Latin-1 Supplement block (U+00C0 to U+00FF)
-			// This includes common accented characters like À, É, ñ, etc.
-			if r >= 0xC0 && r <= 0xFF {
-				mapped := latin1SupplementMap[r]
-				if valid[mapped] {
-					dst = append(dst, mapped)
-					nameLen++
-					lastWasRepl = false
-					if nameLen >= maxLen {
-						break
-					}
-					continue
-				}
-			}
-
-			if !lastWasRepl {
+			} else if !lastWasRepl {
 				dst = append(dst, replacement)
-				nameLen++
 				lastWasRepl = true
 			}
-		}
-
-		if nameLen >= maxLen {
-			break
+		} else if !lastWasRepl {
+			// Invalid or unsupported character - only append if we didn't just add a replacement
+			dst = append(dst, replacement)
+			lastWasRepl = true
 		}
 	}
 
-	// Trim
-	start, end := origLen, len(dst)
-	for start < end && isTrim(dst[start]) {
-		start++
-	}
-	for end > start && isTrim(dst[end-1]) {
-		end--
-	}
-
-	if start > origLen || end < len(dst) {
-		copy(dst[origLen:], dst[start:end])
-		dst = dst[:origLen+(end-start)]
-	}
+	// Trim leading/trailing '.', '_' or '-'
+	trimmed := bytes.Trim(dst[origLen:], "._-")
+	dst = append(dst[:origLen], trimmed...)
 
 	if len(dst) == origLen {
 		return append(dst, "_truncated_"...)