diff --git a/metrics/prometheus_defs.go b/metrics/prometheus_defs.go index b14ca92..289f71b 100644 --- a/metrics/prometheus_defs.go +++ b/metrics/prometheus_defs.go @@ -58,15 +58,16 @@ var ( muxSessionLabels...) // Mux Manager - muxManagerLabels = []string{"addr", "mode", "config_name"} MuxErrors = DefaultCounterVec("mux_errors", "Number of errors observed from mux", append(muxManagerLabels, "error")...) MuxConnectionEstablish = DefaultCounterVec("mux_connection_establish", "Number of times mux has established", muxManagerLabels...) - MuxDialFailed = DefaultCounterVec("mux_dial_failed", "Mux failed when dialing", muxManagerLabels...) - MuxDialSuccess = DefaultCounterVec("mux_dial_success", "Mux succeeded on dial", muxManagerLabels...) MuxServerDisconnected = DefaultCounterVec("mux_server_disconnected", "Mux server disconnected", muxManagerLabels...) NumMuxesActive = DefaultGaugeVec("num_muxes_active", "Host-local number of active muxes for config", muxManagerLabels...) + // Connection provider + ReceiverError = DefaultCounterVec("receiver_error", "Number of errors observed from connection receiver", append(muxManagerLabels, "error")...) + EstablisherError = DefaultCounterVec("establisher_error", "Number of errors observed from connection establisher", muxManagerLabels...) + // Translation interceptor translationLabels = []string{"kind", "message_type"} @@ -128,9 +129,9 @@ func init() { // Mux Manager prometheus.MustRegister(MuxErrors) + prometheus.MustRegister(ReceiverError) prometheus.MustRegister(MuxConnectionEstablish) - prometheus.MustRegister(MuxDialFailed) - prometheus.MustRegister(MuxDialSuccess) + prometheus.MustRegister(EstablisherError) prometheus.MustRegister(MuxServerDisconnected) prometheus.MustRegister(NumMuxesActive) diff --git a/transport/mux/establisher.go b/transport/mux/establisher.go index b6cee67..a3e78de 100644 --- a/transport/mux/establisher.go +++ b/transport/mux/establisher.go @@ -73,8 +73,7 @@ func NewMuxEstablisherProvider(lifetime context.Context, name string, transportF return yamux.Client(conn, cfg) } // pre-initialize the MuxDial metrics - metrics.MuxDialFailed.WithLabelValues(metricLabels...) - metrics.MuxDialSuccess.WithLabelValues(metricLabels...) + metrics.EstablisherError.WithLabelValues(metricLabels...) return NewMuxProvider(lifetime, name, connPv, sessionFn, connectionsCapacity, transportFn, metricLabels, logger), nil } @@ -100,16 +99,17 @@ func (p *establishingConnProvider) NewConnection() (net.Conn, error) { p.logger.Info("mux client failed to dial", tag.Error(err)) return true } + if err := backoff.ThrottleRetry(dialFn, retryPolicy, retryable); err != nil { if p.lifetime.Err() != nil { // shutting down, just exit return nil, p.lifetime.Err() } p.logger.Error("mux client failed to dial with retry", tag.Error(err)) - metrics.MuxDialFailed.WithLabelValues(p.metricLabels...).Inc() + metrics.EstablisherError.WithLabelValues(p.metricLabels...).Inc() return nil, err } - metrics.MuxDialSuccess.WithLabelValues(p.metricLabels...).Inc() + return client, nil } diff --git a/transport/mux/receiver.go b/transport/mux/receiver.go index 9e1ae6d..c8d4549 100644 --- a/transport/mux/receiver.go +++ b/transport/mux/receiver.go @@ -86,7 +86,7 @@ func (r *receivingConnProvider) NewConnection() (net.Conn, error) { } if err != nil { r.logger.Fatal("listener.Accept failed", tag.Error(err)) - metrics.MuxErrors.WithLabelValues(append(r.metricLabels, classifyError(err))...).Inc() + metrics.ReceiverError.WithLabelValues(append(r.metricLabels, classifyError(err))...).Inc() return nil, err } r.logger.Info("Accept new connection", tag.NewStringTag("remoteAddr", conn.RemoteAddr().String())) @@ -98,7 +98,7 @@ func classifyError(err error) string { if err == io.EOF { return "eof" } else { - return "unclassified error" + return "unknown" } }