diff --git a/go.mod b/go.mod index ba259b9cb..863b34085 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,8 @@ require ( github.com/golang-jwt/jwt/v5 v5.2.3 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 + github.com/grafana/otel-profiling-go v0.5.1 + github.com/grafana/pyroscope-go v1.2.8 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.7.0 @@ -103,6 +105,7 @@ require ( github.com/gofrs/uuid v4.4.0+incompatible // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/flatbuffers v25.2.10+incompatible // indirect + github.com/grafana/pyroscope-go/godeltaprof v0.1.9 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect github.com/hako/durafmt v0.0.0-20200710122514-c0fb7b4da026 // indirect diff --git a/go.sum b/go.sum index 146e73df8..38a8d0b61 100644 --- a/go.sum +++ b/go.sum @@ -73,6 +73,7 @@ github.com/go-json-experiment/json v0.0.0-20250223041408-d3c622f1b874/go.mod h1: github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -126,6 +127,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -133,6 +135,12 @@ github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm4 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8= +github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls= +github.com/grafana/pyroscope-go v1.2.8 h1:UvCwIhlx9DeV7F6TW/z8q1Mi4PIm3vuUJ2ZlCEvmA4M= +github.com/grafana/pyroscope-go v1.2.8/go.mod h1:SSi59eQ1/zmKoY/BKwa5rSFsJaq+242Bcrr4wPix1g8= +github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og= +github.com/grafana/pyroscope-go/godeltaprof v0.1.9/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU= github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA= github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 h1:sGm2vDRFUrQJO/Veii4h4zG2vvqG6uWNkBHSTqXOZk0= @@ -365,6 +373,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= @@ -386,6 +395,7 @@ go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= +go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.12.2 h1:06ZeJRe5BnYXceSM9Vya83XXVaNGe3H1QqsvqRANQq8= @@ -410,8 +420,10 @@ go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0 h1:G8Xec/SgZQricwW go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0/go.mod h1:PD57idA/AiFD5aqoxGxCvT/ILJPeHy3MjqU/NS7KogY= go.opentelemetry.io/otel/log v0.15.0 h1:0VqVnc3MgyYd7QqNVIldC3dsLFKgazR6P3P3+ypkyDY= go.opentelemetry.io/otel/log v0.15.0/go.mod h1:9c/G1zbyZfgu1HmQD7Qj84QMmwTp2QCQsZH1aeoWDE4= +go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= go.opentelemetry.io/otel/sdk/log v0.15.0 h1:WgMEHOUt5gjJE93yqfqJOkRflApNif84kxoHWS9VVHE= @@ -420,6 +432,7 @@ go.opentelemetry.io/otel/sdk/log/logtest v0.13.0 h1:9yio6AFZ3QD9j9oqshV1Ibm9gPLl go.opentelemetry.io/otel/sdk/log/logtest v0.13.0/go.mod h1:QOGiAJHl+fob8Nu85ifXfuQYmJTFAvcrxL6w5/tu168= go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI= @@ -528,6 +541,7 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/pkg/loop/config.go b/pkg/loop/config.go index 19e1ab7f1..0335b8ce7 100644 --- a/pkg/loop/config.go +++ b/pkg/loop/config.go @@ -46,6 +46,13 @@ const ( envPromPort = "CL_PROMETHEUS_PORT" + envPyroscopeAuthToken = "CL_PYROSCOPE_AUTH_TOKEN" + envPyroscopeServerAddress = "CL_PYROSCOPE_SERVER_ADDRESS" + envPyroscopeEnvironment = "CL_PYROSCOPE_ENVIRONMENT" + envPyroscopeLinkTracesToProfiles = "CL_PYROSCOPE_LINK_TRACES_TO_PROFILES" + envPyroscopePPROFBlockProfileRate = "CL_PYROSCOPE_PPROF_BLOCK_PROFILE_RATE" + envPyroscopePPROFMutexProfileFraction = "CL_PYROSCOPE_PPROF_MUTEX_PROFILE_FRACTION" + envTracingEnabled = "CL_TRACING_ENABLED" envTracingCollectorTarget = "CL_TRACING_COLLECTOR_TARGET" envTracingSamplingRatio = "CL_TRACING_SAMPLING_RATIO" @@ -89,6 +96,12 @@ const ( type EnvConfig struct { AppID string + ChipIngressEndpoint string + ChipIngressInsecureConnection bool + + CRESettings string + CRESettingsDefault string + DatabaseURL *config.SecretURL DatabaseIdleInTxSessionTimeout time.Duration DatabaseLockTimeout time.Duration @@ -115,13 +128,14 @@ type EnvConfig struct { MercuryTransmitterReaperMaxAge time.Duration MercuryVerboseLogging bool - PrometheusPort int //TODO more than just prom + PrometheusPort int // also serves pprof routes - TracingEnabled bool - TracingCollectorTarget string - TracingSamplingRatio float64 - TracingTLSCertPath string - TracingAttributes map[string]string + PyroscopeAuthToken string + PyroscopeServerAddress string + PyroscopeEnvironment string + PyroscopeLinkTracesToProfiles bool + PyroscopePPROFBlockProfileRate int + PyroscopePPROFMutexProfileFraction int TelemetryEnabled bool TelemetryEndpoint string @@ -148,11 +162,11 @@ type EnvConfig struct { TelemetryMetricCompressor string TelemetryLogCompressor string - ChipIngressEndpoint string - ChipIngressInsecureConnection bool - - CRESettings string - CRESettingsDefault string + TracingEnabled bool + TracingCollectorTarget string + TracingSamplingRatio float64 + TracingTLSCertPath string + TracingAttributes map[string]string } // AsCmdEnv returns a slice of environment variable key/value pairs for an exec.Cmd. @@ -193,6 +207,13 @@ func (e *EnvConfig) AsCmdEnv() (env []string) { add(envPromPort, strconv.Itoa(e.PrometheusPort)) + add(envPyroscopeAuthToken, e.PyroscopeAuthToken) + add(envPyroscopeServerAddress, e.PyroscopeServerAddress) + add(envPyroscopeEnvironment, e.PyroscopeEnvironment) + add(envPyroscopeLinkTracesToProfiles, strconv.FormatBool(e.PyroscopeLinkTracesToProfiles)) + add(envPyroscopePPROFBlockProfileRate, strconv.Itoa(e.PyroscopePPROFBlockProfileRate)) + add(envPyroscopePPROFMutexProfileFraction, strconv.Itoa(e.PyroscopePPROFMutexProfileFraction)) + add(envTracingEnabled, strconv.FormatBool(e.TracingEnabled)) add(envTracingCollectorTarget, e.TracingCollectorTarget) add(envTracingSamplingRatio, strconv.FormatFloat(e.TracingSamplingRatio, 'f', -1, 64)) @@ -352,6 +373,22 @@ func (e *EnvConfig) parse() error { return fmt.Errorf("failed to parse %s = %q: %w", envPromPort, promPortStr, err) } + e.PyroscopeAuthToken = os.Getenv(envPyroscopeAuthToken) + e.PyroscopeServerAddress = os.Getenv(envPyroscopeServerAddress) + e.PyroscopeEnvironment = os.Getenv(envPyroscopeEnvironment) + e.PyroscopeLinkTracesToProfiles, err = getBool(envPyroscopeLinkTracesToProfiles) + if err != nil { + return fmt.Errorf("failed to parse %s: %w", envPyroscopeLinkTracesToProfiles, err) + } + e.PyroscopePPROFBlockProfileRate, err = getInt(envPyroscopePPROFBlockProfileRate) + if err != nil { + return fmt.Errorf("failed to parse %s: %w", envPyroscopePPROFBlockProfileRate, err) + } + e.PyroscopePPROFMutexProfileFraction, err = getInt(envPyroscopePPROFMutexProfileFraction) + if err != nil { + return fmt.Errorf("failed to parse %s: %w", envPyroscopePPROFMutexProfileFraction, err) + } + e.TracingEnabled, err = getBool(envTracingEnabled) if err != nil { return fmt.Errorf("failed to parse %s: %w", envTracingEnabled, err) diff --git a/pkg/loop/config_test.go b/pkg/loop/config_test.go index 9a0b6daf2..34cd59a04 100644 --- a/pkg/loop/config_test.go +++ b/pkg/loop/config_test.go @@ -55,6 +55,13 @@ func TestEnvConfig_parse(t *testing.T) { envPromPort: "8080", + envPyroscopeAuthToken: "token", + envPyroscopeServerAddress: "http://pyroscope:4040", + envPyroscopeEnvironment: "pyroscope-env", + envPyroscopeLinkTracesToProfiles: "true", + envPyroscopePPROFBlockProfileRate: "42", + envPyroscopePPROFMutexProfileFraction: "99", + envTracingEnabled: "true", envTracingCollectorTarget: "some:target", envTracingSamplingRatio: "1.0", @@ -160,6 +167,13 @@ var envCfgFull = EnvConfig{ PrometheusPort: 8080, + PyroscopeAuthToken: "token", + PyroscopeServerAddress: "http://pyroscope:4040", + PyroscopeEnvironment: "pyroscope-env", + PyroscopeLinkTracesToProfiles: true, + PyroscopePPROFBlockProfileRate: 42, + PyroscopePPROFMutexProfileFraction: 99, + TracingEnabled: true, TracingAttributes: map[string]string{"XYZ": "value"}, TracingCollectorTarget: "some:target", @@ -213,6 +227,13 @@ func TestEnvConfig_AsCmdEnv(t *testing.T) { assert.Equal(t, strconv.Itoa(8080), got[envPromPort]) + assert.Equal(t, "token", got[envPyroscopeAuthToken]) + assert.Equal(t, "http://pyroscope:4040", got[envPyroscopeServerAddress]) + assert.Equal(t, "pyroscope-env", got[envPyroscopeEnvironment]) + assert.Equal(t, "true", got[envPyroscopeLinkTracesToProfiles]) + assert.Equal(t, "42", got[envPyroscopePPROFBlockProfileRate]) + assert.Equal(t, "99", got[envPyroscopePPROFMutexProfileFraction]) + assert.Equal(t, "true", got[envTracingEnabled]) assert.Equal(t, "some:target", got[envTracingCollectorTarget]) assert.Equal(t, "1", got[envTracingSamplingRatio]) diff --git a/pkg/loop/server.go b/pkg/loop/server.go index ae61c9d6c..a7d92ee70 100644 --- a/pkg/loop/server.go +++ b/pkg/loop/server.go @@ -5,9 +5,15 @@ import ( "fmt" "os" "os/signal" + "path/filepath" + "runtime" + "runtime/debug" "time" + otelpyroscope "github.com/grafana/otel-profiling-go" + "github.com/grafana/pyroscope-go" "github.com/jmoiron/sqlx" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" sdkmetric "go.opentelemetry.io/otel/sdk/metric" semconv "go.opentelemetry.io/otel/semconv/v1.17.0" @@ -94,6 +100,7 @@ type Server struct { webServer *webServer checker *services.HealthChecker LimitsFactory limits.Factory + profiler *pyroscope.Profiler } func newServer(loggerName string) (*Server, error) { @@ -221,6 +228,67 @@ func (s *Server) start(opts ...ServerOpt) error { } } + if addr := s.EnvConfig.PyroscopeServerAddress; addr != "" { + runtime.SetBlockProfileRate(s.EnvConfig.PyroscopePPROFBlockProfileRate) + runtime.SetMutexProfileFraction(s.EnvConfig.PyroscopePPROFMutexProfileFraction) + + hostname, _ := os.Hostname() + var ver, sha, goVer, module string + if bi, ok := debug.ReadBuildInfo(); ok { + ver = bi.Main.Version + sha = bi.Main.Sum + if len(sha) > 7 { + sha = sha[:7] + } + goVer = bi.GoVersion + module = bi.Main.Path + } + + appName, err := os.Executable() + if err != nil { + s.Logger.Warnf("Failed to get executable name: %v", err) + appName = "unknown" + } else { + appName = filepath.Base(appName) + } + + s.profiler, err = pyroscope.Start(pyroscope.Config{ + ApplicationName: appName, + ServerAddress: s.EnvConfig.PyroscopeServerAddress, + AuthToken: s.EnvConfig.PyroscopeAuthToken, + + Tags: map[string]string{ + "module": module, + "SHA": sha, + "Version": ver, + "go": goVer, + "Environment": s.EnvConfig.PyroscopeEnvironment, + "hostname": hostname, + }, + ProfileTypes: []pyroscope.ProfileType{ + // these profile types are enabled by default: + pyroscope.ProfileCPU, + pyroscope.ProfileAllocObjects, + pyroscope.ProfileAllocSpace, + pyroscope.ProfileInuseObjects, + pyroscope.ProfileInuseSpace, + + // these profile types are optional: + pyroscope.ProfileGoroutines, + pyroscope.ProfileMutexCount, + pyroscope.ProfileMutexDuration, + pyroscope.ProfileBlockCount, + pyroscope.ProfileBlockDuration, + }, + }) + if err != nil { + return fmt.Errorf("failed to start pyroscope profiler: %w", err) + } + if tracingConfig.Enabled && s.EnvConfig.PyroscopeLinkTracesToProfiles { + otel.SetTracerProvider(otelpyroscope.NewTracerProvider(otel.GetTracerProvider())) + } + } + s.webServer = WebServerOpts{}.New(s.Logger, s.EnvConfig.PrometheusPort) if err := s.webServer.Start(ctx); err != nil { return fmt.Errorf("error starting prometheus server: %w", err) @@ -291,6 +359,9 @@ func (s *Server) Stop() { } s.Logger.ErrorIfFn(s.checker.Close, "Failed to close health checker") s.Logger.ErrorIfFn(s.webServer.Close, "Failed to close web server") + if s.profiler != nil { + s.Logger.ErrorIfFn(s.profiler.Stop, "Failed to stop pyroscope profiler") + } if err := s.Logger.Sync(); err != nil { fmt.Println("Failed to sync logger:", err) }