TUN-2699: Metrics for Authenticate RPCs

This commit is contained in:
Adam Chalmers 2020-01-28 12:21:51 -06:00
parent d5139d3882
commit e729dfc51e
3 changed files with 64 additions and 8 deletions

View File

@ -58,9 +58,11 @@ type TunnelMetrics struct {
// oldServerLocations stores the last server the tunnel was connected to
oldServerLocations map[string]string
regSuccess *prometheus.CounterVec
regFail *prometheus.CounterVec
rpcFail *prometheus.CounterVec
regSuccess *prometheus.CounterVec
regFail *prometheus.CounterVec
authSuccess prometheus.Counter
authFail *prometheus.CounterVec
rpcFail *prometheus.CounterVec
muxerMetrics *muxerMetrics
tunnelsHA tunnelsForHA
@ -454,6 +456,27 @@ func NewTunnelMetrics() *TunnelMetrics {
)
prometheus.MustRegister(registerSuccess)
authSuccess := prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_success",
Help: "Count of successful tunnel authenticate",
},
)
prometheus.MustRegister(authSuccess)
authFail := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_fail",
Help: "Count of tunnel authenticate errors by type",
},
[]string{"error"},
)
prometheus.MustRegister(authFail)
return &TunnelMetrics{
haConnections: haConnections,
activeStreams: activeStreams,
@ -474,6 +497,8 @@ func NewTunnelMetrics() *TunnelMetrics {
regFail: registerFail,
rpcFail: rpcFail,
userHostnamesCounts: userHostnamesCounts,
authSuccess: authSuccess,
authFail: authFail,
}
}

View File

@ -354,6 +354,7 @@ func (s *Supervisor) refreshAuth(
logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth)
authOutcome, err := authenticate(ctx, backoff.Retries())
if err != nil {
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
if duration, ok := backoff.GetBackoffDuration(ctx); ok {
logger.WithError(err).Warnf("Retrying in %v", duration)
return backoff.BackoffTimer(), nil
@ -366,15 +367,20 @@ func (s *Supervisor) refreshAuth(
switch outcome := authOutcome.(type) {
case tunnelpogs.AuthSuccess:
s.SetReconnectToken(outcome.JWT())
s.config.Metrics.authSuccess.Inc()
return timeAfter(outcome.RefreshAfter()), nil
case tunnelpogs.AuthUnknown:
duration := outcome.RefreshAfter()
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
logger.WithError(outcome).Warnf("Retrying in %v", duration)
return timeAfter(duration), nil
case tunnelpogs.AuthFail:
s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc()
return nil, outcome
default:
return nil, fmt.Errorf("Unexpected outcome type %T", authOutcome)
err := fmt.Errorf("Unexpected outcome type %T", authOutcome)
s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc()
return nil, err
}
}

View File

@ -8,12 +8,37 @@ import (
"time"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
)
func testConfig(logger *logrus.Logger) *TunnelConfig {
metrics := TunnelMetrics{}
metrics.authSuccess = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_success",
Help: "Count of successful tunnel authenticate",
},
)
metrics.authFail = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: tunnelSubsystem,
Name: "tunnel_authenticate_fail",
Help: "Count of tunnel authenticate errors by type",
},
[]string{"error"},
)
return &TunnelConfig{Logger: logger, Metrics: &metrics}
}
func TestRefreshAuthBackoff(t *testing.T) {
logger := logrus.New()
logger.Level = logrus.ErrorLevel
@ -24,7 +49,7 @@ func TestRefreshAuthBackoff(t *testing.T) {
return time.After(d)
}
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) {
t.FailNow()
}
@ -69,7 +94,7 @@ func TestRefreshAuthSuccess(t *testing.T) {
return time.After(d)
}
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) {
t.FailNow()
}
@ -98,7 +123,7 @@ func TestRefreshAuthUnknown(t *testing.T) {
return time.After(d)
}
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) {
t.FailNow()
}
@ -121,7 +146,7 @@ func TestRefreshAuthFail(t *testing.T) {
logger := logrus.New()
logger.Level = logrus.ErrorLevel
s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New())
s, err := NewSupervisor(testConfig(logger), uuid.New())
if !assert.NoError(t, err) {
t.FailNow()
}