From e729dfc51e199278008fc2a60c916666c6327ce7 Mon Sep 17 00:00:00 2001 From: Adam Chalmers Date: Tue, 28 Jan 2020 12:21:51 -0600 Subject: [PATCH] TUN-2699: Metrics for Authenticate RPCs --- origin/metrics.go | 31 ++++++++++++++++++++++++++++--- origin/supervisor.go | 8 +++++++- origin/supervisor_test.go | 33 +++++++++++++++++++++++++++++---- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/origin/metrics.go b/origin/metrics.go index 928cafb6..20ab2394 100644 --- a/origin/metrics.go +++ b/origin/metrics.go @@ -58,9 +58,11 @@ type TunnelMetrics struct { // oldServerLocations stores the last server the tunnel was connected to oldServerLocations map[string]string - regSuccess *prometheus.CounterVec - regFail *prometheus.CounterVec - rpcFail *prometheus.CounterVec + regSuccess *prometheus.CounterVec + regFail *prometheus.CounterVec + authSuccess prometheus.Counter + authFail *prometheus.CounterVec + rpcFail *prometheus.CounterVec muxerMetrics *muxerMetrics tunnelsHA tunnelsForHA @@ -454,6 +456,27 @@ func NewTunnelMetrics() *TunnelMetrics { ) prometheus.MustRegister(registerSuccess) + authSuccess := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: tunnelSubsystem, + Name: "tunnel_authenticate_success", + Help: "Count of successful tunnel authenticate", + }, + ) + prometheus.MustRegister(authSuccess) + + authFail := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: tunnelSubsystem, + Name: "tunnel_authenticate_fail", + Help: "Count of tunnel authenticate errors by type", + }, + []string{"error"}, + ) + prometheus.MustRegister(authFail) + return &TunnelMetrics{ haConnections: haConnections, activeStreams: activeStreams, @@ -474,6 +497,8 @@ func NewTunnelMetrics() *TunnelMetrics { regFail: registerFail, rpcFail: rpcFail, userHostnamesCounts: userHostnamesCounts, + authSuccess: authSuccess, + authFail: authFail, } } diff --git a/origin/supervisor.go b/origin/supervisor.go index b0a5ade9..a28ce573 100644 --- a/origin/supervisor.go +++ b/origin/supervisor.go @@ -354,6 +354,7 @@ func (s *Supervisor) refreshAuth( logger := s.config.Logger.WithField("subsystem", subsystemRefreshAuth) authOutcome, err := authenticate(ctx, backoff.Retries()) if err != nil { + s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc() if duration, ok := backoff.GetBackoffDuration(ctx); ok { logger.WithError(err).Warnf("Retrying in %v", duration) return backoff.BackoffTimer(), nil @@ -366,15 +367,20 @@ func (s *Supervisor) refreshAuth( switch outcome := authOutcome.(type) { case tunnelpogs.AuthSuccess: s.SetReconnectToken(outcome.JWT()) + s.config.Metrics.authSuccess.Inc() return timeAfter(outcome.RefreshAfter()), nil case tunnelpogs.AuthUnknown: duration := outcome.RefreshAfter() + s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc() logger.WithError(outcome).Warnf("Retrying in %v", duration) return timeAfter(duration), nil case tunnelpogs.AuthFail: + s.config.Metrics.authFail.WithLabelValues(outcome.Error()).Inc() return nil, outcome default: - return nil, fmt.Errorf("Unexpected outcome type %T", authOutcome) + err := fmt.Errorf("Unexpected outcome type %T", authOutcome) + s.config.Metrics.authFail.WithLabelValues(err.Error()).Inc() + return nil, err } } diff --git a/origin/supervisor_test.go b/origin/supervisor_test.go index 986b326f..5f031221 100644 --- a/origin/supervisor_test.go +++ b/origin/supervisor_test.go @@ -8,12 +8,37 @@ import ( "time" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs" ) +func testConfig(logger *logrus.Logger) *TunnelConfig { + metrics := TunnelMetrics{} + + metrics.authSuccess = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: tunnelSubsystem, + Name: "tunnel_authenticate_success", + Help: "Count of successful tunnel authenticate", + }, + ) + + metrics.authFail = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: tunnelSubsystem, + Name: "tunnel_authenticate_fail", + Help: "Count of tunnel authenticate errors by type", + }, + []string{"error"}, + ) + return &TunnelConfig{Logger: logger, Metrics: &metrics} +} + func TestRefreshAuthBackoff(t *testing.T) { logger := logrus.New() logger.Level = logrus.ErrorLevel @@ -24,7 +49,7 @@ func TestRefreshAuthBackoff(t *testing.T) { return time.After(d) } - s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) + s, err := NewSupervisor(testConfig(logger), uuid.New()) if !assert.NoError(t, err) { t.FailNow() } @@ -69,7 +94,7 @@ func TestRefreshAuthSuccess(t *testing.T) { return time.After(d) } - s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) + s, err := NewSupervisor(testConfig(logger), uuid.New()) if !assert.NoError(t, err) { t.FailNow() } @@ -98,7 +123,7 @@ func TestRefreshAuthUnknown(t *testing.T) { return time.After(d) } - s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) + s, err := NewSupervisor(testConfig(logger), uuid.New()) if !assert.NoError(t, err) { t.FailNow() } @@ -121,7 +146,7 @@ func TestRefreshAuthFail(t *testing.T) { logger := logrus.New() logger.Level = logrus.ErrorLevel - s, err := NewSupervisor(&TunnelConfig{Logger: logger}, uuid.New()) + s, err := NewSupervisor(testConfig(logger), uuid.New()) if !assert.NoError(t, err) { t.FailNow() }