2022-02-07 09:42:07 +00:00
|
|
|
package supervisor
|
2018-05-01 23:45:06 +00:00
|
|
|
|
|
|
|
import (
|
2019-01-10 20:55:44 +00:00
|
|
|
"context"
|
2018-05-01 23:45:06 +00:00
|
|
|
"crypto/tls"
|
|
|
|
"fmt"
|
|
|
|
"net"
|
2020-10-08 10:12:26 +00:00
|
|
|
"runtime/debug"
|
2018-05-01 23:45:06 +00:00
|
|
|
"strings"
|
2019-02-19 17:40:49 +00:00
|
|
|
"sync"
|
2018-05-01 23:45:06 +00:00
|
|
|
"time"
|
|
|
|
|
2019-11-21 17:03:13 +00:00
|
|
|
"github.com/google/uuid"
|
2021-08-17 14:30:02 +00:00
|
|
|
"github.com/lucas-clemente/quic-go"
|
2019-11-21 17:03:13 +00:00
|
|
|
"github.com/pkg/errors"
|
2020-11-25 06:55:13 +00:00
|
|
|
"github.com/rs/zerolog"
|
2019-11-21 17:03:13 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2019-11-21 18:10:44 +00:00
|
|
|
"github.com/cloudflare/cloudflared/connection"
|
2020-10-08 10:12:26 +00:00
|
|
|
"github.com/cloudflare/cloudflared/edgediscovery"
|
2021-08-06 13:31:22 +00:00
|
|
|
"github.com/cloudflare/cloudflared/edgediscovery/allregions"
|
2018-05-01 23:45:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/h2mux"
|
2022-02-11 10:49:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/orchestration"
|
2021-10-15 11:05:54 +00:00
|
|
|
quicpogs "github.com/cloudflare/cloudflared/quic"
|
2021-03-26 04:04:56 +00:00
|
|
|
"github.com/cloudflare/cloudflared/retry"
|
2019-03-04 19:48:56 +00:00
|
|
|
"github.com/cloudflare/cloudflared/signal"
|
2018-05-01 23:45:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/tunnelrpc"
|
|
|
|
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
dialTimeout = 15 * time.Second
|
2020-03-31 13:59:00 +00:00
|
|
|
FeatureSerializedHeaders = "serialized_headers"
|
|
|
|
FeatureQuickReconnects = "quick_reconnects"
|
2022-03-10 10:48:03 +00:00
|
|
|
FeatureAllowRemoteConfig = "allow_remote_config"
|
2018-05-01 23:45:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type TunnelConfig struct {
|
2022-02-07 09:42:07 +00:00
|
|
|
GracePeriod time.Duration
|
|
|
|
ReplaceExisting bool
|
|
|
|
OSArch string
|
|
|
|
ClientID string
|
|
|
|
CloseConnOnce *sync.Once // Used to close connectedSignal no more than once
|
|
|
|
EdgeAddrs []string
|
|
|
|
Region string
|
|
|
|
HAConnections int
|
|
|
|
IncidentLookup IncidentLookup
|
|
|
|
IsAutoupdated bool
|
|
|
|
LBPool string
|
|
|
|
Tags []tunnelpogs.Tag
|
|
|
|
Log *zerolog.Logger
|
|
|
|
LogTransport *zerolog.Logger
|
|
|
|
Observer *connection.Observer
|
|
|
|
ReportedVersion string
|
|
|
|
Retries uint
|
|
|
|
RunFromTerminal bool
|
|
|
|
|
|
|
|
NamedTunnel *connection.NamedTunnelProperties
|
|
|
|
ClassicTunnel *connection.ClassicTunnelProperties
|
2020-10-14 13:42:00 +00:00
|
|
|
MuxerConfig *connection.MuxerConfig
|
|
|
|
ProtocolSelector connection.ProtocolSelector
|
|
|
|
EdgeTLSConfigs map[connection.Protocol]*tls.Config
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
func (c *TunnelConfig) registrationOptions(connectionID uint8, OriginLocalIP string, uuid uuid.UUID) *tunnelpogs.RegistrationOptions {
|
2018-05-01 23:45:06 +00:00
|
|
|
policy := tunnelrpc.ExistingTunnelPolicy_balance
|
|
|
|
if c.HAConnections <= 1 && c.LBPool == "" {
|
|
|
|
policy = tunnelrpc.ExistingTunnelPolicy_disconnect
|
|
|
|
}
|
|
|
|
return &tunnelpogs.RegistrationOptions{
|
|
|
|
ClientID: c.ClientID,
|
|
|
|
Version: c.ReportedVersion,
|
2021-03-08 16:46:23 +00:00
|
|
|
OS: c.OSArch,
|
2018-05-01 23:45:06 +00:00
|
|
|
ExistingTunnelPolicy: policy,
|
|
|
|
PoolName: c.LBPool,
|
2020-11-02 11:21:34 +00:00
|
|
|
Tags: c.Tags,
|
2018-05-01 23:45:06 +00:00
|
|
|
ConnectionID: connectionID,
|
|
|
|
OriginLocalIP: OriginLocalIP,
|
|
|
|
IsAutoupdated: c.IsAutoupdated,
|
|
|
|
RunFromTerminal: c.RunFromTerminal,
|
2020-10-08 10:12:26 +00:00
|
|
|
CompressionQuality: uint64(c.MuxerConfig.CompressionSetting),
|
2018-10-08 19:20:28 +00:00
|
|
|
UUID: uuid.String(),
|
2020-03-31 13:59:00 +00:00
|
|
|
Features: c.SupportedFeatures(),
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
func (c *TunnelConfig) connectionOptions(originLocalAddr string, numPreviousAttempts uint8) *tunnelpogs.ConnectionOptions {
|
2020-06-25 18:25:39 +00:00
|
|
|
// attempt to parse out origin IP, but don't fail since it's informational field
|
|
|
|
host, _, _ := net.SplitHostPort(originLocalAddr)
|
|
|
|
originIP := net.ParseIP(host)
|
|
|
|
|
|
|
|
return &tunnelpogs.ConnectionOptions{
|
2020-07-31 15:22:23 +00:00
|
|
|
Client: c.NamedTunnel.Client,
|
|
|
|
OriginLocalIP: originIP,
|
2022-02-07 09:42:07 +00:00
|
|
|
ReplaceExisting: c.ReplaceExisting,
|
2020-10-08 10:12:26 +00:00
|
|
|
CompressionQuality: uint8(c.MuxerConfig.CompressionSetting),
|
2020-07-31 15:22:23 +00:00
|
|
|
NumPreviousAttempts: numPreviousAttempts,
|
2020-06-25 18:25:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-31 13:59:00 +00:00
|
|
|
func (c *TunnelConfig) SupportedFeatures() []string {
|
2020-06-25 18:25:39 +00:00
|
|
|
features := []string{FeatureSerializedHeaders}
|
|
|
|
if c.NamedTunnel == nil {
|
|
|
|
features = append(features, FeatureQuickReconnects)
|
2020-03-31 13:59:00 +00:00
|
|
|
}
|
2020-06-25 18:25:39 +00:00
|
|
|
return features
|
2020-03-31 13:59:00 +00:00
|
|
|
}
|
|
|
|
|
2021-01-20 19:41:09 +00:00
|
|
|
func StartTunnelDaemon(
|
|
|
|
ctx context.Context,
|
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-01-20 19:41:09 +00:00
|
|
|
connectedSignal *signal.Signal,
|
|
|
|
reconnectCh chan ReconnectSignal,
|
2021-02-05 00:07:49 +00:00
|
|
|
graceShutdownC <-chan struct{},
|
2021-01-20 19:41:09 +00:00
|
|
|
) error {
|
2022-02-11 10:49:06 +00:00
|
|
|
s, err := NewSupervisor(config, orchestrator, reconnectCh, graceShutdownC)
|
2019-12-13 23:05:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-01-20 19:41:09 +00:00
|
|
|
return s.Run(ctx, connectedSignal)
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2020-12-28 18:10:01 +00:00
|
|
|
func ServeTunnelLoop(
|
|
|
|
ctx context.Context,
|
2020-08-18 10:14:14 +00:00
|
|
|
credentialManager *reconnectCredentialManager,
|
2018-05-01 23:45:06 +00:00
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-08-06 13:31:22 +00:00
|
|
|
addr *allregions.EdgeAddr,
|
2021-11-08 15:43:36 +00:00
|
|
|
connAwareLogger *ConnAwareLogger,
|
2020-10-14 13:42:00 +00:00
|
|
|
connIndex uint8,
|
2019-03-04 19:48:56 +00:00
|
|
|
connectedSignal *signal.Signal,
|
2020-06-17 18:33:55 +00:00
|
|
|
cloudflaredUUID uuid.UUID,
|
2020-04-30 05:02:08 +00:00
|
|
|
reconnectCh chan ReconnectSignal,
|
2021-02-05 00:07:49 +00:00
|
|
|
gracefulShutdownC <-chan struct{},
|
2018-05-01 23:45:06 +00:00
|
|
|
) error {
|
2020-10-08 10:12:26 +00:00
|
|
|
haConnections.Inc()
|
|
|
|
defer haConnections.Dec()
|
|
|
|
|
2021-11-08 15:43:36 +00:00
|
|
|
logger := config.Log.With().Uint8(connection.LogFieldConnIndex, connIndex).Logger()
|
|
|
|
connLog := connAwareLogger.ReplaceLogger(&logger)
|
2020-12-28 18:10:01 +00:00
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
protocolFallback := &protocolFallback{
|
2021-03-26 04:04:56 +00:00
|
|
|
retry.BackoffHandler{MaxRetries: config.Retries},
|
2020-10-14 13:42:00 +00:00
|
|
|
config.ProtocolSelector.Current(),
|
|
|
|
false,
|
|
|
|
}
|
2018-05-01 23:45:06 +00:00
|
|
|
connectedFuse := h2mux.NewBooleanFuse()
|
|
|
|
go func() {
|
|
|
|
if connectedFuse.Await() {
|
2019-03-04 19:48:56 +00:00
|
|
|
connectedSignal.Notify()
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
// Ensure the above goroutine will terminate if we return without connecting
|
|
|
|
defer connectedFuse.Fuse(false)
|
2020-10-14 13:42:00 +00:00
|
|
|
// Each connection to keep its own copy of protocol, because individual connections might fallback
|
|
|
|
// to another protocol when a particular metal doesn't support new protocol
|
2018-05-01 23:45:06 +00:00
|
|
|
for {
|
2019-11-04 11:11:54 +00:00
|
|
|
err, recoverable := ServeTunnel(
|
|
|
|
ctx,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog,
|
2019-12-06 21:32:15 +00:00
|
|
|
credentialManager,
|
2019-11-04 11:11:54 +00:00
|
|
|
config,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2020-10-14 13:42:00 +00:00
|
|
|
addr,
|
|
|
|
connIndex,
|
2019-11-04 11:11:54 +00:00
|
|
|
connectedFuse,
|
2021-02-05 00:07:49 +00:00
|
|
|
protocolFallback,
|
2020-06-17 18:33:55 +00:00
|
|
|
cloudflaredUUID,
|
2020-03-19 15:38:28 +00:00
|
|
|
reconnectCh,
|
2021-02-05 00:07:49 +00:00
|
|
|
protocolFallback.protocol,
|
2021-01-20 19:41:09 +00:00
|
|
|
gracefulShutdownC,
|
2019-11-04 11:11:54 +00:00
|
|
|
)
|
2020-10-14 13:42:00 +00:00
|
|
|
if !recoverable {
|
|
|
|
return err
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
2020-10-14 13:42:00 +00:00
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
config.Observer.SendReconnect(connIndex)
|
|
|
|
|
2021-02-10 16:42:09 +00:00
|
|
|
duration, ok := protocolFallback.GetMaxBackoffDuration(ctx)
|
2021-02-05 00:07:49 +00:00
|
|
|
if !ok {
|
2020-10-14 13:42:00 +00:00
|
|
|
return err
|
|
|
|
}
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Info().Msgf("Retrying connection in up to %s seconds", duration)
|
2021-02-05 00:07:49 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case <-gracefulShutdownC:
|
|
|
|
return nil
|
|
|
|
case <-protocolFallback.BackoffTimer():
|
2021-11-03 12:06:04 +00:00
|
|
|
if !selectNextProtocol(
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger(),
|
2021-11-03 12:06:04 +00:00
|
|
|
protocolFallback,
|
|
|
|
config.ProtocolSelector,
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
err,
|
2021-11-03 12:06:04 +00:00
|
|
|
) {
|
2021-02-05 00:07:49 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
// protocolFallback is a wrapper around backoffHandler that will try fallback option when backoff reaches
|
2020-10-14 13:42:00 +00:00
|
|
|
// max retries
|
2021-02-05 00:07:49 +00:00
|
|
|
type protocolFallback struct {
|
2021-03-26 04:04:56 +00:00
|
|
|
retry.BackoffHandler
|
2020-10-14 13:42:00 +00:00
|
|
|
protocol connection.Protocol
|
|
|
|
inFallback bool
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func (pf *protocolFallback) reset() {
|
2021-03-26 04:04:56 +00:00
|
|
|
pf.ResetNow()
|
2020-10-14 13:42:00 +00:00
|
|
|
pf.inFallback = false
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func (pf *protocolFallback) fallback(fallback connection.Protocol) {
|
2021-03-26 04:04:56 +00:00
|
|
|
pf.ResetNow()
|
2020-10-14 13:42:00 +00:00
|
|
|
pf.protocol = fallback
|
|
|
|
pf.inFallback = true
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
// selectNextProtocol picks connection protocol for the next retry iteration,
|
|
|
|
// returns true if it was able to pick the protocol, false if we are out of options and should stop retrying
|
|
|
|
func selectNextProtocol(
|
|
|
|
connLog *zerolog.Logger,
|
|
|
|
protocolBackoff *protocolFallback,
|
|
|
|
selector connection.ProtocolSelector,
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
cause error,
|
2021-02-05 00:07:49 +00:00
|
|
|
) bool {
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
var idleTimeoutError *quic.IdleTimeoutError
|
|
|
|
isNetworkActivityTimeout := errors.As(cause, &idleTimeoutError)
|
|
|
|
_, hasFallback := selector.Fallback()
|
|
|
|
|
|
|
|
if protocolBackoff.ReachedMaxRetries() || (hasFallback && isNetworkActivityTimeout) {
|
2021-02-05 00:07:49 +00:00
|
|
|
fallback, hasFallback := selector.Fallback()
|
2020-10-14 13:42:00 +00:00
|
|
|
if !hasFallback {
|
2021-02-05 00:07:49 +00:00
|
|
|
return false
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
// Already using fallback protocol, no point to retry
|
2021-02-05 00:07:49 +00:00
|
|
|
if protocolBackoff.protocol == fallback {
|
|
|
|
return false
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
2021-02-05 00:07:49 +00:00
|
|
|
connLog.Info().Msgf("Switching to fallback protocol %s", fallback)
|
|
|
|
protocolBackoff.fallback(fallback)
|
|
|
|
} else if !protocolBackoff.inFallback {
|
|
|
|
current := selector.Current()
|
|
|
|
if protocolBackoff.protocol != current {
|
|
|
|
protocolBackoff.protocol = current
|
|
|
|
connLog.Info().Msgf("Changing protocol to %s", current)
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-05 00:07:49 +00:00
|
|
|
return true
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
// ServeTunnel runs a single tunnel connection, returns nil on graceful shutdown,
|
|
|
|
// on error returns a flag indicating if error can be retried
|
2018-05-01 23:45:06 +00:00
|
|
|
func ServeTunnel(
|
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2020-08-18 10:14:14 +00:00
|
|
|
credentialManager *reconnectCredentialManager,
|
2018-05-01 23:45:06 +00:00
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-08-06 13:31:22 +00:00
|
|
|
addr *allregions.EdgeAddr,
|
2020-10-14 13:42:00 +00:00
|
|
|
connIndex uint8,
|
2020-10-08 10:12:26 +00:00
|
|
|
fuse *h2mux.BooleanFuse,
|
2021-02-05 00:07:49 +00:00
|
|
|
backoff *protocolFallback,
|
2020-06-17 18:33:55 +00:00
|
|
|
cloudflaredUUID uuid.UUID,
|
2020-04-30 05:02:08 +00:00
|
|
|
reconnectCh chan ReconnectSignal,
|
2020-10-14 13:42:00 +00:00
|
|
|
protocol connection.Protocol,
|
2021-02-05 00:07:49 +00:00
|
|
|
gracefulShutdownC <-chan struct{},
|
2018-05-01 23:45:06 +00:00
|
|
|
) (err error, recoverable bool) {
|
|
|
|
// Treat panics as recoverable errors
|
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
var ok bool
|
|
|
|
err, ok = r.(error)
|
|
|
|
if !ok {
|
|
|
|
err = fmt.Errorf("ServeTunnel: %v", r)
|
|
|
|
}
|
2020-10-08 10:12:26 +00:00
|
|
|
err = errors.Wrapf(err, "stack trace: %s", string(debug.Stack()))
|
2018-05-01 23:45:06 +00:00
|
|
|
recoverable = true
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-11-30 20:05:37 +00:00
|
|
|
defer config.Observer.SendDisconnect(connIndex)
|
2021-09-21 06:11:36 +00:00
|
|
|
err, recoverable = serveTunnel(
|
|
|
|
ctx,
|
|
|
|
connLog,
|
|
|
|
credentialManager,
|
|
|
|
config,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2021-09-21 06:11:36 +00:00
|
|
|
addr,
|
|
|
|
connIndex,
|
|
|
|
fuse,
|
|
|
|
backoff,
|
|
|
|
cloudflaredUUID,
|
|
|
|
reconnectCh,
|
|
|
|
protocol,
|
|
|
|
gracefulShutdownC,
|
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
switch err := err.(type) {
|
|
|
|
case connection.DupConnRegisterTunnelError:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Unable to establish connection.")
|
2021-09-21 06:11:36 +00:00
|
|
|
// don't retry this connection anymore, let supervisor pick a new address
|
|
|
|
return err, false
|
|
|
|
case connection.ServerRegisterTunnelError:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Register tunnel error from server side")
|
2021-09-21 06:11:36 +00:00
|
|
|
// Don't send registration error return from server to Sentry. They are
|
|
|
|
// logged on server side
|
|
|
|
if incidents := config.IncidentLookup.ActiveIncidents(); len(incidents) > 0 {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Msg(activeIncidentsMsg(incidents))
|
2021-09-21 06:11:36 +00:00
|
|
|
}
|
|
|
|
return err.Cause, !err.Permanent
|
|
|
|
case ReconnectSignal:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Info().
|
2021-09-21 06:11:36 +00:00
|
|
|
Uint8(connection.LogFieldConnIndex, connIndex).
|
|
|
|
Msgf("Restarting connection due to reconnect signal in %s", err.Delay)
|
|
|
|
err.DelayBeforeReconnect()
|
|
|
|
return err, true
|
|
|
|
default:
|
|
|
|
if err == context.Canceled {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Debug().Err(err).Msgf("Serve tunnel error")
|
2021-09-21 06:11:36 +00:00
|
|
|
return err, false
|
|
|
|
}
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msgf("Serve tunnel error")
|
2021-09-21 06:11:36 +00:00
|
|
|
_, permanent := err.(unrecoverableError)
|
|
|
|
return err, !permanent
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func serveTunnel(
|
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2021-09-21 06:11:36 +00:00
|
|
|
credentialManager *reconnectCredentialManager,
|
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-09-21 06:11:36 +00:00
|
|
|
addr *allregions.EdgeAddr,
|
|
|
|
connIndex uint8,
|
|
|
|
fuse *h2mux.BooleanFuse,
|
|
|
|
backoff *protocolFallback,
|
|
|
|
cloudflaredUUID uuid.UUID,
|
|
|
|
reconnectCh chan ReconnectSignal,
|
|
|
|
protocol connection.Protocol,
|
|
|
|
gracefulShutdownC <-chan struct{},
|
|
|
|
) (err error, recoverable bool) {
|
2021-08-17 14:30:02 +00:00
|
|
|
connectedFuse := &connectedFuse{
|
|
|
|
fuse: fuse,
|
|
|
|
backoff: backoff,
|
|
|
|
}
|
|
|
|
controlStream := connection.NewControlStream(
|
|
|
|
config.Observer,
|
|
|
|
connectedFuse,
|
|
|
|
config.NamedTunnel,
|
|
|
|
connIndex,
|
|
|
|
nil,
|
|
|
|
gracefulShutdownC,
|
2022-02-07 09:42:07 +00:00
|
|
|
config.GracePeriod,
|
2021-08-17 14:30:02 +00:00
|
|
|
)
|
|
|
|
|
2021-09-21 06:11:36 +00:00
|
|
|
switch protocol {
|
2021-10-11 10:31:05 +00:00
|
|
|
case connection.QUIC, connection.QUICWarp:
|
2022-02-07 09:42:07 +00:00
|
|
|
connOptions := config.connectionOptions(addr.UDP.String(), uint8(backoff.Retries()))
|
2021-08-17 14:30:02 +00:00
|
|
|
return ServeQUIC(ctx,
|
|
|
|
addr.UDP,
|
|
|
|
config,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog,
|
2021-08-17 14:30:02 +00:00
|
|
|
connOptions,
|
|
|
|
controlStream,
|
2021-10-15 11:05:54 +00:00
|
|
|
connIndex,
|
2021-08-17 14:30:02 +00:00
|
|
|
reconnectCh,
|
|
|
|
gracefulShutdownC)
|
|
|
|
|
2021-10-11 10:31:05 +00:00
|
|
|
case connection.HTTP2, connection.HTTP2Warp:
|
2021-09-21 06:11:36 +00:00
|
|
|
edgeConn, err := edgediscovery.DialEdge(ctx, dialTimeout, config.EdgeTLSConfigs[protocol], addr.TCP)
|
|
|
|
if err != nil {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Unable to establish connection with Cloudflare edge")
|
2021-09-21 06:11:36 +00:00
|
|
|
return err, true
|
|
|
|
}
|
2021-01-27 13:19:37 +00:00
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
connOptions := config.connectionOptions(edgeConn.LocalAddr().String(), uint8(backoff.Retries()))
|
2021-09-21 06:11:36 +00:00
|
|
|
if err := ServeHTTP2(
|
2021-01-20 19:41:09 +00:00
|
|
|
ctx,
|
2021-02-22 15:30:27 +00:00
|
|
|
connLog,
|
2021-01-20 19:41:09 +00:00
|
|
|
config,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2021-01-20 19:41:09 +00:00
|
|
|
edgeConn,
|
|
|
|
connOptions,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStream,
|
2021-01-20 19:41:09 +00:00
|
|
|
connIndex,
|
|
|
|
gracefulShutdownC,
|
2021-08-17 14:30:02 +00:00
|
|
|
reconnectCh,
|
2021-09-21 06:11:36 +00:00
|
|
|
); err != nil {
|
|
|
|
return err, false
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
edgeConn, err := edgediscovery.DialEdge(ctx, dialTimeout, config.EdgeTLSConfigs[protocol], addr.TCP)
|
|
|
|
if err != nil {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Unable to establish connection with Cloudflare edge")
|
2021-09-21 06:11:36 +00:00
|
|
|
return err, true
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := ServeH2mux(
|
2021-01-27 13:19:37 +00:00
|
|
|
ctx,
|
2021-02-22 15:30:27 +00:00
|
|
|
connLog,
|
2021-01-27 13:19:37 +00:00
|
|
|
credentialManager,
|
|
|
|
config,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2021-01-27 13:19:37 +00:00
|
|
|
edgeConn,
|
|
|
|
connIndex,
|
|
|
|
connectedFuse,
|
|
|
|
cloudflaredUUID,
|
|
|
|
reconnectCh,
|
|
|
|
gracefulShutdownC,
|
2021-09-21 06:11:36 +00:00
|
|
|
); err != nil {
|
2021-01-27 13:19:37 +00:00
|
|
|
return err, false
|
|
|
|
}
|
|
|
|
}
|
2021-09-21 06:11:36 +00:00
|
|
|
return
|
2021-01-27 13:19:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type unrecoverableError struct {
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r unrecoverableError) Error() string {
|
|
|
|
return r.err.Error()
|
2020-10-08 10:12:26 +00:00
|
|
|
}
|
2020-10-08 09:48:10 +00:00
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func ServeH2mux(
|
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2020-10-08 10:12:26 +00:00
|
|
|
credentialManager *reconnectCredentialManager,
|
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2020-10-08 10:12:26 +00:00
|
|
|
edgeConn net.Conn,
|
2020-12-28 18:10:01 +00:00
|
|
|
connIndex uint8,
|
2020-10-08 10:12:26 +00:00
|
|
|
connectedFuse *connectedFuse,
|
|
|
|
cloudflaredUUID uuid.UUID,
|
|
|
|
reconnectCh chan ReconnectSignal,
|
2021-02-05 00:07:49 +00:00
|
|
|
gracefulShutdownC <-chan struct{},
|
2021-01-27 13:19:37 +00:00
|
|
|
) error {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Debug().Msgf("Connecting via h2mux")
|
2018-05-01 23:45:06 +00:00
|
|
|
// Returns error from parsing the origin URL or handshake errors
|
2020-11-25 06:55:13 +00:00
|
|
|
handler, err, recoverable := connection.NewH2muxConnection(
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2022-02-07 09:42:07 +00:00
|
|
|
config.GracePeriod,
|
2020-11-25 06:55:13 +00:00
|
|
|
config.MuxerConfig,
|
|
|
|
edgeConn,
|
2020-12-28 18:10:01 +00:00
|
|
|
connIndex,
|
2020-11-25 06:55:13 +00:00
|
|
|
config.Observer,
|
2021-01-20 19:41:09 +00:00
|
|
|
gracefulShutdownC,
|
2020-11-25 06:55:13 +00:00
|
|
|
)
|
2018-05-01 23:45:06 +00:00
|
|
|
if err != nil {
|
2021-01-27 13:19:37 +00:00
|
|
|
if !recoverable {
|
|
|
|
return unrecoverableError{err}
|
|
|
|
}
|
|
|
|
return err
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
errGroup, serveCtx := errgroup.WithContext(ctx)
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
errGroup.Go(func() error {
|
2020-10-08 10:12:26 +00:00
|
|
|
if config.NamedTunnel != nil {
|
2022-02-07 09:42:07 +00:00
|
|
|
connOptions := config.connectionOptions(edgeConn.LocalAddr().String(), uint8(connectedFuse.backoff.Retries()))
|
2021-01-20 19:41:09 +00:00
|
|
|
return handler.ServeNamedTunnel(serveCtx, config.NamedTunnel, connOptions, connectedFuse)
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
2022-02-07 09:42:07 +00:00
|
|
|
registrationOptions := config.registrationOptions(connIndex, edgeConn.LocalAddr().String(), cloudflaredUUID)
|
2020-10-14 13:42:00 +00:00
|
|
|
return handler.ServeClassicTunnel(serveCtx, config.ClassicTunnel, credentialManager, registrationOptions, connectedFuse)
|
2018-05-01 23:45:06 +00:00
|
|
|
})
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
errGroup.Go(func() error {
|
|
|
|
return listenReconnect(serveCtx, reconnectCh, gracefulShutdownC)
|
|
|
|
})
|
2018-05-01 23:45:06 +00:00
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
return errGroup.Wait()
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func ServeHTTP2(
|
2020-09-11 22:02:34 +00:00
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2020-09-11 22:02:34 +00:00
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2020-10-08 10:12:26 +00:00
|
|
|
tlsServerConn net.Conn,
|
|
|
|
connOptions *tunnelpogs.ConnectionOptions,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStreamHandler connection.ControlStreamHandler,
|
2020-09-25 13:12:53 +00:00
|
|
|
connIndex uint8,
|
2021-02-05 00:07:49 +00:00
|
|
|
gracefulShutdownC <-chan struct{},
|
2021-08-17 14:30:02 +00:00
|
|
|
reconnectCh chan ReconnectSignal,
|
2021-01-27 13:19:37 +00:00
|
|
|
) error {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Debug().Msgf("Connecting via http2")
|
2021-01-20 19:41:09 +00:00
|
|
|
h2conn := connection.NewHTTP2Connection(
|
2020-11-25 06:55:13 +00:00
|
|
|
tlsServerConn,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2020-11-25 06:55:13 +00:00
|
|
|
connOptions,
|
|
|
|
config.Observer,
|
|
|
|
connIndex,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStreamHandler,
|
2021-07-16 15:14:37 +00:00
|
|
|
config.Log,
|
2020-11-25 06:55:13 +00:00
|
|
|
)
|
2020-09-11 22:02:34 +00:00
|
|
|
|
2020-09-25 13:12:53 +00:00
|
|
|
errGroup, serveCtx := errgroup.WithContext(ctx)
|
|
|
|
errGroup.Go(func() error {
|
2021-01-20 19:41:09 +00:00
|
|
|
return h2conn.Serve(serveCtx)
|
2020-09-25 13:12:53 +00:00
|
|
|
})
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
errGroup.Go(func() error {
|
|
|
|
err := listenReconnect(serveCtx, reconnectCh, gracefulShutdownC)
|
|
|
|
if err != nil {
|
|
|
|
// forcefully break the connection (this is only used for testing)
|
|
|
|
_ = tlsServerConn.Close()
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
2020-09-11 22:02:34 +00:00
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
return errGroup.Wait()
|
2020-09-11 22:02:34 +00:00
|
|
|
}
|
|
|
|
|
2021-08-17 14:30:02 +00:00
|
|
|
func ServeQUIC(
|
|
|
|
ctx context.Context,
|
|
|
|
edgeAddr *net.UDPAddr,
|
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLogger *ConnAwareLogger,
|
2021-08-17 14:30:02 +00:00
|
|
|
connOptions *tunnelpogs.ConnectionOptions,
|
|
|
|
controlStreamHandler connection.ControlStreamHandler,
|
2021-10-15 11:05:54 +00:00
|
|
|
connIndex uint8,
|
2021-08-17 14:30:02 +00:00
|
|
|
reconnectCh chan ReconnectSignal,
|
|
|
|
gracefulShutdownC <-chan struct{},
|
|
|
|
) (err error, recoverable bool) {
|
|
|
|
tlsConfig := config.EdgeTLSConfigs[connection.QUIC]
|
|
|
|
quicConfig := &quic.Config{
|
TUN-5621: Correctly manage QUIC stream closing
Until this PR, we were naively closing the quic.Stream whenever
the callstack for handling the request (HTTP or TCP) finished.
However, our proxy handler may still be reading or writing from
the quic.Stream at that point, because we return the callstack if
either side finishes, but not necessarily both.
This is a problem for quic-go library because quic.Stream#Close
cannot be called concurrently with quic.Stream#Write
Furthermore, we also noticed that quic.Stream#Close does nothing
to do receiving stream (since, underneath, quic.Stream has 2 streams,
1 for each direction), thus leaking memory, as explained in:
https://github.com/lucas-clemente/quic-go/issues/3322
This PR addresses both problems by wrapping the quic.Stream that
is passed down to the proxying logic and handle all these concerns.
2022-01-27 22:37:45 +00:00
|
|
|
HandshakeIdleTimeout: quicpogs.HandshakeIdleTimeout,
|
|
|
|
MaxIdleTimeout: quicpogs.MaxIdleTimeout,
|
2021-10-08 12:48:20 +00:00
|
|
|
MaxIncomingStreams: connection.MaxConcurrentStreams,
|
|
|
|
MaxIncomingUniStreams: connection.MaxConcurrentStreams,
|
|
|
|
KeepAlive: true,
|
2021-11-12 09:37:28 +00:00
|
|
|
EnableDatagrams: true,
|
2022-01-06 12:17:10 +00:00
|
|
|
MaxDatagramFrameSize: quicpogs.MaxDatagramFrameSize,
|
2021-11-08 15:43:36 +00:00
|
|
|
Tracer: quicpogs.NewClientTracer(connLogger.Logger(), connIndex),
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
quicConn, err := connection.NewQUICConnection(
|
|
|
|
quicConfig,
|
|
|
|
edgeAddr,
|
|
|
|
tlsConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator,
|
2022-01-05 16:01:56 +00:00
|
|
|
connOptions,
|
|
|
|
controlStreamHandler,
|
|
|
|
connLogger.Logger())
|
|
|
|
if err != nil {
|
|
|
|
connLogger.ConnAwareLogger().Err(err).Msgf("Failed to create new quic connection")
|
|
|
|
return err, true
|
|
|
|
}
|
2021-08-17 14:30:02 +00:00
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
errGroup, serveCtx := errgroup.WithContext(ctx)
|
|
|
|
errGroup.Go(func() error {
|
|
|
|
err := quicConn.Serve(serveCtx)
|
|
|
|
if err != nil {
|
|
|
|
connLogger.ConnAwareLogger().Err(err).Msg("Failed to serve quic connection")
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
2021-08-17 14:30:02 +00:00
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
errGroup.Go(func() error {
|
|
|
|
err := listenReconnect(serveCtx, reconnectCh, gracefulShutdownC)
|
|
|
|
if err != nil {
|
|
|
|
// forcefully break the connection (this is only used for testing)
|
|
|
|
quicConn.Close()
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
2022-01-05 16:01:56 +00:00
|
|
|
return err
|
|
|
|
})
|
|
|
|
|
|
|
|
return errGroup.Wait(), false
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func listenReconnect(ctx context.Context, reconnectCh <-chan ReconnectSignal, gracefulShutdownCh <-chan struct{}) error {
|
2021-01-27 13:19:37 +00:00
|
|
|
select {
|
|
|
|
case reconnect := <-reconnectCh:
|
|
|
|
return reconnect
|
|
|
|
case <-gracefulShutdownCh:
|
|
|
|
return nil
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|
2019-04-25 23:13:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
type connectedFuse struct {
|
|
|
|
fuse *h2mux.BooleanFuse
|
2021-02-05 00:07:49 +00:00
|
|
|
backoff *protocolFallback
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func (cf *connectedFuse) Connected() {
|
|
|
|
cf.fuse.Fuse(true)
|
2020-10-14 13:42:00 +00:00
|
|
|
cf.backoff.reset()
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func (cf *connectedFuse) IsConnected() bool {
|
|
|
|
return cf.fuse.Value()
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|
2019-01-10 20:55:44 +00:00
|
|
|
|
|
|
|
func activeIncidentsMsg(incidents []Incident) string {
|
|
|
|
preamble := "There is an active Cloudflare incident that may be related:"
|
|
|
|
if len(incidents) > 1 {
|
|
|
|
preamble = "There are active Cloudflare incidents that may be related:"
|
|
|
|
}
|
|
|
|
incidentStrings := []string{}
|
|
|
|
for _, incident := range incidents {
|
|
|
|
incidentString := fmt.Sprintf("%s (%s)", incident.Name, incident.URL())
|
|
|
|
incidentStrings = append(incidentStrings, incidentString)
|
|
|
|
}
|
|
|
|
return preamble + " " + strings.Join(incidentStrings, "; ")
|
|
|
|
}
|