TUN-2699: Metrics for Authenticate RPCs

This commit is contained in:
Adam Chalmers 2020-01-28 12:21:51 -06:00
parent d5139d3882
commit e729dfc51e
3 changed files with 64 additions and 8 deletions

View File

@ -60,6 +60,8 @@ type TunnelMetrics struct {
regSuccess *prometheus.CounterVec regSuccess *prometheus.CounterVec
regFail *prometheus.CounterVec regFail *prometheus.CounterVec
authSuccess prometheus.Counter
authFail *prometheus.CounterVec
rpcFail *prometheus.CounterVec rpcFail *prometheus.CounterVec
muxerMetrics *muxerMetrics muxerMetrics *muxerMetrics
@ -454,6 +456,27 @@ func NewTunnelMetrics() *TunnelMetrics {
) )
prometheus.MustRegister(registerSuccess) prometheus.MustRegister(registerSuccess)
authSuccess := prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_success",
Help: "Count of successful tunnel authenticate",
},
)
prometheus.MustRegister(authSuccess)
authFail := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_fail",
Help: "Count of tunnel authenticate errors by type",
},
[]string{"error"},
)
prometheus.MustRegister(authFail)
return &TunnelMetrics{ return &TunnelMetrics{
haConnections: haConnections, haConnections: haConnections,
activeStreams: activeStreams, activeStreams: activeStreams,
@ -474,6 +497,8 @@ func NewTunnelMetrics() *TunnelMetrics {
regFail: registerFail, regFail: registerFail,
rpcFail: rpcFail, rpcFail: rpcFail,
userHostnamesCounts: userHostnamesCounts, userHostnamesCounts: userHostnamesCounts,
authSuccess: authSuccess,
authFail: authFail,
} }
} }

View File

@ -354,6 +354,7 @@ func (s *Supervisor) refreshAuth(
logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth) logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth)
authOutcome, err := authenticate(ctx, backoff.Retries()) authOutcome, err := authenticate(ctx, backoff.Retries())
if err != nil { if err != nil {
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
if duration, ok := backoff.GetBackoffDuration(ctx); ok { if duration, ok := backoff.GetBackoffDuration(ctx); ok {
logger.WithError(err).Warnf("Retrying in %v", duration) logger.WithError(err).Warnf("Retrying in %v", duration)
return backoff.BackoffTimer(), nil return backoff.BackoffTimer(), nil
@ -366,15 +367,20 @@ func (s *Supervisor) refreshAuth(
switch outcome := authOutcome.(type) { switch outcome := authOutcome.(type) {
case tunnelpogs.AuthSuccess: case tunnelpogs.AuthSuccess:
s.SetReconnectToken(outcome.JWT()) s.SetReconnectToken(outcome.JWT())
s.config.Metrics.authSuccess.Inc()
return timeAfter(outcome.RefreshAfter()), nil return timeAfter(outcome.RefreshAfter()), nil
case tunnelpogs.AuthUnknown: case tunnelpogs.AuthUnknown:
duration := outcome.RefreshAfter() duration := outcome.RefreshAfter()
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
logger.WithError(outcome).Warnf("Retrying in %v", duration) logger.WithError(outcome).Warnf("Retrying in %v", duration)
return timeAfter(duration), nil return timeAfter(duration), nil
case tunnelpogs.AuthFail: case tunnelpogs.AuthFail:
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
return nil, outcome return nil, outcome
default: default:
return nil, fmt.Errorf("Unexpected outcome type %T", authOutcome) err := fmt.Errorf("Unexpected outcome type %T", authOutcome)
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
return nil, err
} }
} }

View File

@ -8,12 +8,37 @@ import (
"time" "time"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs" tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
) )
func testConfig(logger *logrus.Logger) *TunnelConfig {
metrics := TunnelMetrics{}
metrics.authSuccess = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_success",
Help: "Count of successful tunnel authenticate",
},
)
metrics.authFail = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_fail",
Help: "Count of tunnel authenticate errors by type",
},
[]string{"error"},
)
return &TunnelConfig{Logger: logger, Metrics: &metrics}
}
func TestRefreshAuthBackoff(t *testing.T) { func TestRefreshAuthBackoff(t *testing.T) {
logger := logrus.New() logger := logrus.New()
logger.Level = logrus.ErrorLevel logger.Level = logrus.ErrorLevel
@ -24,7 +49,7 @@ func TestRefreshAuthBackoff(t *testing.T) {
return time.After(d) return time.After(d)
} }
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) { if !assert.NoError(t, err) {
t.FailNow() t.FailNow()
} }
@ -69,7 +94,7 @@ func TestRefreshAuthSuccess(t *testing.T) {
return time.After(d) return time.After(d)
} }
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) { if !assert.NoError(t, err) {
t.FailNow() t.FailNow()
} }
@ -98,7 +123,7 @@ func TestRefreshAuthUnknown(t *testing.T) {
return time.After(d) return time.After(d)
} }
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) { if !assert.NoError(t, err) {
t.FailNow() t.FailNow()
} }
@ -121,7 +146,7 @@ func TestRefreshAuthFail(t *testing.T) {
logger := logrus.New() logger := logrus.New()
logger.Level = logrus.ErrorLevel logger.Level = logrus.ErrorLevel
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) { if !assert.NoError(t, err) {
t.FailNow() t.FailNow()
} }