TUN-2699: Metrics for Authenticate RPCs
This commit is contained in:
parent
d5139d3882
commit
e729dfc51e
|
@ -58,9 +58,11 @@ type TunnelMetrics struct {
|
||||||
// oldServerLocations stores the last server the tunnel was connected to
|
// oldServerLocations stores the last server the tunnel was connected to
|
||||||
oldServerLocations map[string]string
|
oldServerLocations map[string]string
|
||||||
|
|
||||||
regSuccess *prometheus.CounterVec
|
regSuccess *prometheus.CounterVec
|
||||||
regFail *prometheus.CounterVec
|
regFail *prometheus.CounterVec
|
||||||
rpcFail *prometheus.CounterVec
|
authSuccess prometheus.Counter
|
||||||
|
authFail *prometheus.CounterVec
|
||||||
|
rpcFail *prometheus.CounterVec
|
||||||
|
|
||||||
muxerMetrics *muxerMetrics
|
muxerMetrics *muxerMetrics
|
||||||
tunnelsHA tunnelsForHA
|
tunnelsHA tunnelsForHA
|
||||||
|
@ -454,6 +456,27 @@ func NewTunnelMetrics() *TunnelMetrics {
|
||||||
)
|
)
|
||||||
prometheus.MustRegister(registerSuccess)
|
prometheus.MustRegister(registerSuccess)
|
||||||
|
|
||||||
|
authSuccess := prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: metricsNamespace,
|
||||||
|
Subsystem: tunnelSubsystem,
|
||||||
|
Name: "tunnel_authenticate_success",
|
||||||
|
Help: "Count of successful tunnel authenticate",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
prometheus.MustRegister(authSuccess)
|
||||||
|
|
||||||
|
authFail := prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: metricsNamespace,
|
||||||
|
Subsystem: tunnelSubsystem,
|
||||||
|
Name: "tunnel_authenticate_fail",
|
||||||
|
Help: "Count of tunnel authenticate errors by type",
|
||||||
|
},
|
||||||
|
[]string{"error"},
|
||||||
|
)
|
||||||
|
prometheus.MustRegister(authFail)
|
||||||
|
|
||||||
return &TunnelMetrics{
|
return &TunnelMetrics{
|
||||||
haConnections: haConnections,
|
haConnections: haConnections,
|
||||||
activeStreams: activeStreams,
|
activeStreams: activeStreams,
|
||||||
|
@ -474,6 +497,8 @@ func NewTunnelMetrics() *TunnelMetrics {
|
||||||
regFail: registerFail,
|
regFail: registerFail,
|
||||||
rpcFail: rpcFail,
|
rpcFail: rpcFail,
|
||||||
userHostnamesCounts: userHostnamesCounts,
|
userHostnamesCounts: userHostnamesCounts,
|
||||||
|
authSuccess: authSuccess,
|
||||||
|
authFail: authFail,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -354,6 +354,7 @@ func (s *Supervisor) refreshAuth(
|
||||||
logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth)
|
logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth)
|
||||||
authOutcome, err := authenticate(ctx, backoff.Retries())
|
authOutcome, err := authenticate(ctx, backoff.Retries())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
|
||||||
if duration, ok := backoff.GetBackoffDuration(ctx); ok {
|
if duration, ok := backoff.GetBackoffDuration(ctx); ok {
|
||||||
logger.WithError(err).Warnf("Retrying in %v", duration)
|
logger.WithError(err).Warnf("Retrying in %v", duration)
|
||||||
return backoff.BackoffTimer(), nil
|
return backoff.BackoffTimer(), nil
|
||||||
|
@ -366,15 +367,20 @@ func (s *Supervisor) refreshAuth(
|
||||||
switch outcome := authOutcome.(type) {
|
switch outcome := authOutcome.(type) {
|
||||||
case tunnelpogs.AuthSuccess:
|
case tunnelpogs.AuthSuccess:
|
||||||
s.SetReconnectToken(outcome.JWT())
|
s.SetReconnectToken(outcome.JWT())
|
||||||
|
s.config.Metrics.authSuccess.Inc()
|
||||||
return timeAfter(outcome.RefreshAfter()), nil
|
return timeAfter(outcome.RefreshAfter()), nil
|
||||||
case tunnelpogs.AuthUnknown:
|
case tunnelpogs.AuthUnknown:
|
||||||
duration := outcome.RefreshAfter()
|
duration := outcome.RefreshAfter()
|
||||||
|
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
|
||||||
logger.WithError(outcome).Warnf("Retrying in %v", duration)
|
logger.WithError(outcome).Warnf("Retrying in %v", duration)
|
||||||
return timeAfter(duration), nil
|
return timeAfter(duration), nil
|
||||||
case tunnelpogs.AuthFail:
|
case tunnelpogs.AuthFail:
|
||||||
|
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
|
||||||
return nil, outcome
|
return nil, outcome
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("Unexpected outcome type %T", authOutcome)
|
err := fmt.Errorf("Unexpected outcome type %T", authOutcome)
|
||||||
|
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,12 +8,37 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
|
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func testConfig(logger *logrus.Logger) *TunnelConfig {
|
||||||
|
metrics := TunnelMetrics{}
|
||||||
|
|
||||||
|
metrics.authSuccess = prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: metricsNamespace,
|
||||||
|
Subsystem: tunnelSubsystem,
|
||||||
|
Name: "tunnel_authenticate_success",
|
||||||
|
Help: "Count of successful tunnel authenticate",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics.authFail = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: metricsNamespace,
|
||||||
|
Subsystem: tunnelSubsystem,
|
||||||
|
Name: "tunnel_authenticate_fail",
|
||||||
|
Help: "Count of tunnel authenticate errors by type",
|
||||||
|
},
|
||||||
|
[]string{"error"},
|
||||||
|
)
|
||||||
|
return &TunnelConfig{Logger: logger, Metrics: &metrics}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRefreshAuthBackoff(t *testing.T) {
|
func TestRefreshAuthBackoff(t *testing.T) {
|
||||||
logger := logrus.New()
|
logger := logrus.New()
|
||||||
logger.Level = logrus.ErrorLevel
|
logger.Level = logrus.ErrorLevel
|
||||||
|
@ -24,7 +49,7 @@ func TestRefreshAuthBackoff(t *testing.T) {
|
||||||
return time.After(d)
|
return time.After(d)
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
|
s, err := NewSupervisor(testConfig(logger), uuid.New())
|
||||||
if !assert.NoError(t, err) {
|
if !assert.NoError(t, err) {
|
||||||
t.FailNow()
|
t.FailNow()
|
||||||
}
|
}
|
||||||
|
@ -69,7 +94,7 @@ func TestRefreshAuthSuccess(t *testing.T) {
|
||||||
return time.After(d)
|
return time.After(d)
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
|
s, err := NewSupervisor(testConfig(logger), uuid.New())
|
||||||
if !assert.NoError(t, err) {
|
if !assert.NoError(t, err) {
|
||||||
t.FailNow()
|
t.FailNow()
|
||||||
}
|
}
|
||||||
|
@ -98,7 +123,7 @@ func TestRefreshAuthUnknown(t *testing.T) {
|
||||||
return time.After(d)
|
return time.After(d)
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
|
s, err := NewSupervisor(testConfig(logger), uuid.New())
|
||||||
if !assert.NoError(t, err) {
|
if !assert.NoError(t, err) {
|
||||||
t.FailNow()
|
t.FailNow()
|
||||||
}
|
}
|
||||||
|
@ -121,7 +146,7 @@ func TestRefreshAuthFail(t *testing.T) {
|
||||||
logger := logrus.New()
|
logger := logrus.New()
|
||||||
logger.Level = logrus.ErrorLevel
|
logger.Level = logrus.ErrorLevel
|
||||||
|
|
||||||
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
|
s, err := NewSupervisor(testConfig(logger), uuid.New())
|
||||||
if !assert.NoError(t, err) {
|
if !assert.NoError(t, err) {
|
||||||
t.FailNow()
|
t.FailNow()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue