2022-02-07 09:42:07 +00:00
|
|
|
package supervisor
|
2018-05-01 23:45:06 +00:00
|
|
|
|
|
|
|
import (
|
2019-01-10 20:55:44 +00:00
|
|
|
"context"
|
2018-05-01 23:45:06 +00:00
|
|
|
"crypto/tls"
|
|
|
|
"fmt"
|
|
|
|
"net"
|
2020-10-08 10:12:26 +00:00
|
|
|
"runtime/debug"
|
2018-05-01 23:45:06 +00:00
|
|
|
"strings"
|
2019-02-19 17:40:49 +00:00
|
|
|
"sync"
|
2018-05-01 23:45:06 +00:00
|
|
|
"time"
|
|
|
|
|
2019-11-21 17:03:13 +00:00
|
|
|
"github.com/google/uuid"
|
|
|
|
"github.com/pkg/errors"
|
2023-05-06 00:42:41 +00:00
|
|
|
"github.com/quic-go/quic-go"
|
2020-11-25 06:55:13 +00:00
|
|
|
"github.com/rs/zerolog"
|
2019-11-21 17:03:13 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2019-11-21 18:10:44 +00:00
|
|
|
"github.com/cloudflare/cloudflared/connection"
|
2020-10-08 10:12:26 +00:00
|
|
|
"github.com/cloudflare/cloudflared/edgediscovery"
|
2021-08-06 13:31:22 +00:00
|
|
|
"github.com/cloudflare/cloudflared/edgediscovery/allregions"
|
2023-03-29 20:12:32 +00:00
|
|
|
"github.com/cloudflare/cloudflared/features"
|
2018-05-01 23:45:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/h2mux"
|
2022-10-13 10:01:25 +00:00
|
|
|
"github.com/cloudflare/cloudflared/ingress"
|
2023-04-12 21:41:11 +00:00
|
|
|
"github.com/cloudflare/cloudflared/management"
|
2022-02-11 10:49:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/orchestration"
|
2021-10-15 11:05:54 +00:00
|
|
|
quicpogs "github.com/cloudflare/cloudflared/quic"
|
2021-03-26 04:04:56 +00:00
|
|
|
"github.com/cloudflare/cloudflared/retry"
|
2019-03-04 19:48:56 +00:00
|
|
|
"github.com/cloudflare/cloudflared/signal"
|
2018-05-01 23:45:06 +00:00
|
|
|
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
|
2024-05-14 04:22:06 +00:00
|
|
|
"github.com/cloudflare/cloudflared/tunnelrpc/proto"
|
2022-08-11 20:31:36 +00:00
|
|
|
"github.com/cloudflare/cloudflared/tunnelstate"
|
2018-05-01 23:45:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2023-03-29 20:12:32 +00:00
|
|
|
dialTimeout = 15 * time.Second
|
2018-05-01 23:45:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type TunnelConfig struct {
|
2022-12-14 11:43:52 +00:00
|
|
|
GracePeriod time.Duration
|
|
|
|
ReplaceExisting bool
|
|
|
|
OSArch string
|
|
|
|
ClientID string
|
|
|
|
CloseConnOnce *sync.Once // Used to close connectedSignal no more than once
|
|
|
|
EdgeAddrs []string
|
|
|
|
Region string
|
|
|
|
EdgeIPVersion allregions.ConfigIPVersion
|
2023-02-28 16:11:42 +00:00
|
|
|
EdgeBindAddr net.IP
|
2022-12-14 11:43:52 +00:00
|
|
|
HAConnections int
|
|
|
|
IsAutoupdated bool
|
|
|
|
LBPool string
|
|
|
|
Tags []tunnelpogs.Tag
|
|
|
|
Log *zerolog.Logger
|
|
|
|
LogTransport *zerolog.Logger
|
|
|
|
Observer *connection.Observer
|
|
|
|
ReportedVersion string
|
|
|
|
Retries uint
|
|
|
|
MaxEdgeAddrRetries uint8
|
|
|
|
RunFromTerminal bool
|
2022-02-07 09:42:07 +00:00
|
|
|
|
2022-08-24 12:33:10 +00:00
|
|
|
NeedPQ bool
|
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
NamedTunnel *connection.NamedTunnelProperties
|
2020-10-14 13:42:00 +00:00
|
|
|
ProtocolSelector connection.ProtocolSelector
|
|
|
|
EdgeTLSConfigs map[connection.Protocol]*tls.Config
|
2022-10-13 10:01:25 +00:00
|
|
|
PacketConfig *ingress.GlobalRouterConfig
|
2023-06-19 16:03:11 +00:00
|
|
|
|
2024-05-14 04:22:06 +00:00
|
|
|
RPCTimeout time.Duration
|
|
|
|
WriteStreamTimeout time.Duration
|
2023-07-12 09:37:19 +00:00
|
|
|
|
|
|
|
DisableQUICPathMTUDiscovery bool
|
2023-08-25 13:39:25 +00:00
|
|
|
|
|
|
|
FeatureSelector *features.FeatureSelector
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
func (c *TunnelConfig) registrationOptions(connectionID uint8, OriginLocalIP string, uuid uuid.UUID) *tunnelpogs.RegistrationOptions {
|
2024-05-14 04:22:06 +00:00
|
|
|
policy := proto.ExistingTunnelPolicy_balance
|
2018-05-01 23:45:06 +00:00
|
|
|
if c.HAConnections <= 1 && c.LBPool == "" {
|
2024-05-14 04:22:06 +00:00
|
|
|
policy = proto.ExistingTunnelPolicy_disconnect
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
return &tunnelpogs.RegistrationOptions{
|
|
|
|
ClientID: c.ClientID,
|
|
|
|
Version: c.ReportedVersion,
|
2021-03-08 16:46:23 +00:00
|
|
|
OS: c.OSArch,
|
2018-05-01 23:45:06 +00:00
|
|
|
ExistingTunnelPolicy: policy,
|
|
|
|
PoolName: c.LBPool,
|
2020-11-02 11:21:34 +00:00
|
|
|
Tags: c.Tags,
|
2018-05-01 23:45:06 +00:00
|
|
|
ConnectionID: connectionID,
|
|
|
|
OriginLocalIP: OriginLocalIP,
|
|
|
|
IsAutoupdated: c.IsAutoupdated,
|
|
|
|
RunFromTerminal: c.RunFromTerminal,
|
2023-03-07 21:51:37 +00:00
|
|
|
CompressionQuality: 0,
|
2018-10-08 19:20:28 +00:00
|
|
|
UUID: uuid.String(),
|
2020-03-31 13:59:00 +00:00
|
|
|
Features: c.SupportedFeatures(),
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-07 09:42:07 +00:00
|
|
|
func (c *TunnelConfig) connectionOptions(originLocalAddr string, numPreviousAttempts uint8) *tunnelpogs.ConnectionOptions {
|
2020-06-25 18:25:39 +00:00
|
|
|
// attempt to parse out origin IP, but don't fail since it's informational field
|
|
|
|
host, _, _ := net.SplitHostPort(originLocalAddr)
|
|
|
|
originIP := net.ParseIP(host)
|
|
|
|
|
|
|
|
return &tunnelpogs.ConnectionOptions{
|
2020-07-31 15:22:23 +00:00
|
|
|
Client: c.NamedTunnel.Client,
|
|
|
|
OriginLocalIP: originIP,
|
2022-02-07 09:42:07 +00:00
|
|
|
ReplaceExisting: c.ReplaceExisting,
|
2023-03-07 21:51:37 +00:00
|
|
|
CompressionQuality: 0,
|
2020-07-31 15:22:23 +00:00
|
|
|
NumPreviousAttempts: numPreviousAttempts,
|
2020-06-25 18:25:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-31 13:59:00 +00:00
|
|
|
func (c *TunnelConfig) SupportedFeatures() []string {
|
2023-03-29 20:12:32 +00:00
|
|
|
supported := []string{features.FeatureSerializedHeaders}
|
2020-06-25 18:25:39 +00:00
|
|
|
if c.NamedTunnel == nil {
|
2023-03-29 20:12:32 +00:00
|
|
|
supported = append(supported, features.FeatureQuickReconnects)
|
2020-03-31 13:59:00 +00:00
|
|
|
}
|
2023-03-29 20:12:32 +00:00
|
|
|
return supported
|
2020-03-31 13:59:00 +00:00
|
|
|
}
|
|
|
|
|
2021-01-20 19:41:09 +00:00
|
|
|
func StartTunnelDaemon(
|
|
|
|
ctx context.Context,
|
|
|
|
config *TunnelConfig,
|
2022-02-11 10:49:06 +00:00
|
|
|
orchestrator *orchestration.Orchestrator,
|
2021-01-20 19:41:09 +00:00
|
|
|
connectedSignal *signal.Signal,
|
|
|
|
reconnectCh chan ReconnectSignal,
|
2021-02-05 00:07:49 +00:00
|
|
|
graceShutdownC <-chan struct{},
|
2021-01-20 19:41:09 +00:00
|
|
|
) error {
|
2022-02-11 10:49:06 +00:00
|
|
|
s, err := NewSupervisor(config, orchestrator, reconnectCh, graceShutdownC)
|
2019-12-13 23:05:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-01-20 19:41:09 +00:00
|
|
|
return s.Run(ctx, connectedSignal)
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
type ConnectivityError struct {
|
|
|
|
reachedMaxRetries bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewConnectivityError(hasReachedMaxRetries bool) *ConnectivityError {
|
|
|
|
return &ConnectivityError{
|
|
|
|
reachedMaxRetries: hasReachedMaxRetries,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *ConnectivityError) Error() string {
|
|
|
|
return fmt.Sprintf("connectivity error - reached max retries: %t", e.HasReachedMaxRetries())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *ConnectivityError) HasReachedMaxRetries() bool {
|
|
|
|
return e.reachedMaxRetries
|
|
|
|
}
|
|
|
|
|
2022-05-20 21:51:36 +00:00
|
|
|
// EdgeAddrHandler provides a mechanism switch between behaviors in ServeTunnel
|
|
|
|
// for handling the errors when attempting to make edge connections.
|
|
|
|
type EdgeAddrHandler interface {
|
|
|
|
// ShouldGetNewAddress will check the edge connection error and determine if
|
|
|
|
// the edge address should be replaced with a new one. Also, will return if the
|
|
|
|
// error should be recognized as a connectivity error, or otherwise, a general
|
|
|
|
// application error.
|
2022-12-14 11:43:52 +00:00
|
|
|
ShouldGetNewAddress(connIndex uint8, err error) (needsNewAddress bool, connectivityError error)
|
2022-05-20 21:51:36 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
func NewIPAddrFallback(maxRetries uint8) *ipAddrFallback {
|
|
|
|
return &ipAddrFallback{
|
|
|
|
retriesByConnIndex: make(map[uint8]uint8),
|
|
|
|
maxRetries: maxRetries,
|
2022-05-20 21:51:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
// ipAddrFallback will have more conditions to fall back to a new address for certain
|
2022-05-20 21:51:36 +00:00
|
|
|
// edge connection errors. This means that this handler will return true for isConnectivityError
|
|
|
|
// for more cases like duplicate connection register and edge quic dial errors.
|
2022-12-14 11:43:52 +00:00
|
|
|
type ipAddrFallback struct {
|
|
|
|
m sync.Mutex
|
|
|
|
retriesByConnIndex map[uint8]uint8
|
|
|
|
maxRetries uint8
|
|
|
|
}
|
2022-05-20 21:51:36 +00:00
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
func (f *ipAddrFallback) ShouldGetNewAddress(connIndex uint8, err error) (needsNewAddress bool, connectivityError error) {
|
|
|
|
f.m.Lock()
|
|
|
|
defer f.m.Unlock()
|
2022-05-20 21:51:36 +00:00
|
|
|
switch err.(type) {
|
|
|
|
case nil: // maintain current IP address
|
|
|
|
// Try the next address if it was a quic.IdleTimeoutError
|
|
|
|
// DupConnRegisterTunnelError needs to also receive a new ip address
|
|
|
|
case connection.DupConnRegisterTunnelError,
|
|
|
|
*quic.IdleTimeoutError:
|
2022-12-14 11:43:52 +00:00
|
|
|
return true, nil
|
2022-05-20 21:51:36 +00:00
|
|
|
// Network problems should be retried with new address immediately and report
|
|
|
|
// as connectivity error
|
|
|
|
case edgediscovery.DialError, *connection.EdgeQuicDialError:
|
2022-12-14 11:43:52 +00:00
|
|
|
if f.retriesByConnIndex[connIndex] >= f.maxRetries {
|
|
|
|
f.retriesByConnIndex[connIndex] = 0
|
|
|
|
return true, NewConnectivityError(true)
|
|
|
|
}
|
|
|
|
f.retriesByConnIndex[connIndex]++
|
|
|
|
return true, NewConnectivityError(false)
|
2022-05-20 21:51:36 +00:00
|
|
|
default: // maintain current IP address
|
|
|
|
}
|
2022-12-14 11:43:52 +00:00
|
|
|
return false, nil
|
2022-05-20 21:51:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type EdgeTunnelServer struct {
|
|
|
|
config *TunnelConfig
|
|
|
|
orchestrator *orchestration.Orchestrator
|
|
|
|
credentialManager *reconnectCredentialManager
|
|
|
|
edgeAddrHandler EdgeAddrHandler
|
|
|
|
edgeAddrs *edgediscovery.Edge
|
2023-02-28 16:11:42 +00:00
|
|
|
edgeBindAddr net.IP
|
2022-05-20 21:51:36 +00:00
|
|
|
reconnectCh chan ReconnectSignal
|
|
|
|
gracefulShutdownC <-chan struct{}
|
2022-08-11 20:31:36 +00:00
|
|
|
tracker *tunnelstate.ConnTracker
|
2022-05-20 21:51:36 +00:00
|
|
|
|
|
|
|
connAwareLogger *ConnAwareLogger
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:08:45 +00:00
|
|
|
type TunnelServer interface {
|
|
|
|
Serve(ctx context.Context, connIndex uint8, protocolFallback *protocolFallback, connectedSignal *signal.Signal) error
|
|
|
|
}
|
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
func (e *EdgeTunnelServer) Serve(ctx context.Context, connIndex uint8, protocolFallback *protocolFallback, connectedSignal *signal.Signal) error {
|
2020-10-08 10:12:26 +00:00
|
|
|
haConnections.Inc()
|
|
|
|
defer haConnections.Dec()
|
|
|
|
|
2018-05-01 23:45:06 +00:00
|
|
|
connectedFuse := h2mux.NewBooleanFuse()
|
|
|
|
go func() {
|
|
|
|
if connectedFuse.Await() {
|
2019-03-04 19:48:56 +00:00
|
|
|
connectedSignal.Notify()
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
// Ensure the above goroutine will terminate if we return without connecting
|
|
|
|
defer connectedFuse.Fuse(false)
|
2022-05-20 21:51:36 +00:00
|
|
|
|
|
|
|
// Fetch IP address to associated connection index
|
|
|
|
addr, err := e.edgeAddrs.GetAddr(int(connIndex))
|
2022-06-18 00:24:37 +00:00
|
|
|
switch err.(type) {
|
2022-05-20 21:51:36 +00:00
|
|
|
case nil: // no error
|
|
|
|
case edgediscovery.ErrNoAddressesLeft:
|
|
|
|
return err
|
|
|
|
default:
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
logger := e.config.Log.With().
|
2023-04-12 21:41:11 +00:00
|
|
|
Int(management.EventTypeKey, int(management.Cloudflared)).
|
2022-05-20 21:51:36 +00:00
|
|
|
IPAddr(connection.LogFieldIPAddress, addr.UDP.IP).
|
|
|
|
Uint8(connection.LogFieldConnIndex, connIndex).
|
|
|
|
Logger()
|
|
|
|
connLog := e.connAwareLogger.ReplaceLogger(&logger)
|
2022-12-14 11:43:52 +00:00
|
|
|
|
2020-10-14 13:42:00 +00:00
|
|
|
// Each connection to keep its own copy of protocol, because individual connections might fallback
|
|
|
|
// to another protocol when a particular metal doesn't support new protocol
|
2022-05-20 21:51:36 +00:00
|
|
|
// Each connection can also have it's own IP version because individual connections might fallback
|
|
|
|
// to another IP version.
|
2022-12-14 11:43:52 +00:00
|
|
|
err, shouldFallbackProtocol := e.serveTunnel(
|
2022-05-20 21:51:36 +00:00
|
|
|
ctx,
|
|
|
|
connLog,
|
|
|
|
addr,
|
|
|
|
connIndex,
|
|
|
|
connectedFuse,
|
|
|
|
protocolFallback,
|
|
|
|
protocolFallback.protocol,
|
|
|
|
)
|
2021-02-05 00:07:49 +00:00
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
// Check if the connection error was from an IP issue with the host or
|
|
|
|
// establishing a connection to the edge and if so, rotate the IP address.
|
|
|
|
shouldRotateEdgeIP, cErr := e.edgeAddrHandler.ShouldGetNewAddress(connIndex, err)
|
|
|
|
if shouldRotateEdgeIP {
|
|
|
|
// rotate IP, but forcing internal state to assign a new IP to connection index.
|
|
|
|
if _, err := e.edgeAddrs.GetDifferentAddr(int(connIndex), true); err != nil {
|
2022-05-20 21:51:36 +00:00
|
|
|
return err
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
2022-10-12 10:55:41 +00:00
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
// In addition, if it is a connectivity error, and we have exhausted the configurable maximum edge IPs to rotate,
|
|
|
|
// then just fallback protocol on next iteration run.
|
|
|
|
connectivityErr, ok := cErr.(*ConnectivityError)
|
|
|
|
if ok {
|
|
|
|
shouldFallbackProtocol = connectivityErr.HasReachedMaxRetries()
|
|
|
|
}
|
2022-05-20 21:51:36 +00:00
|
|
|
}
|
2022-04-01 14:58:51 +00:00
|
|
|
|
2022-12-14 11:43:52 +00:00
|
|
|
// set connection has re-connecting and log the next retrying backoff
|
|
|
|
duration, ok := protocolFallback.GetMaxBackoffDuration(ctx)
|
|
|
|
if !ok {
|
|
|
|
return err
|
2022-05-20 21:51:36 +00:00
|
|
|
}
|
2022-12-14 11:43:52 +00:00
|
|
|
e.config.Observer.SendReconnect(connIndex)
|
|
|
|
connLog.Logger().Info().Msgf("Retrying connection in up to %s", duration)
|
2022-05-20 21:51:36 +00:00
|
|
|
|
2022-05-20 21:51:36 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case <-e.gracefulShutdownC:
|
|
|
|
return nil
|
|
|
|
case <-protocolFallback.BackoffTimer():
|
2022-12-14 11:43:52 +00:00
|
|
|
// should we fallback protocol? If not, just return. Otherwise, set new protocol for next method call.
|
|
|
|
if !shouldFallbackProtocol {
|
2022-05-20 21:51:36 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-08-11 20:31:36 +00:00
|
|
|
// If a single connection has connected with the current protocol, we know we know we don't have to fallback
|
|
|
|
// to a different protocol.
|
|
|
|
if e.tracker.HasConnectedWith(e.config.ProtocolSelector.Current()) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-05-20 21:51:36 +00:00
|
|
|
if !selectNextProtocol(
|
|
|
|
connLog.Logger(),
|
|
|
|
protocolFallback,
|
|
|
|
e.config.ProtocolSelector,
|
|
|
|
err,
|
|
|
|
) {
|
|
|
|
return err
|
2021-02-05 00:07:49 +00:00
|
|
|
}
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
2022-05-20 21:51:36 +00:00
|
|
|
|
|
|
|
return err
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
// protocolFallback is a wrapper around backoffHandler that will try fallback option when backoff reaches
|
2020-10-14 13:42:00 +00:00
|
|
|
// max retries
|
2021-02-05 00:07:49 +00:00
|
|
|
type protocolFallback struct {
|
2021-03-26 04:04:56 +00:00
|
|
|
retry.BackoffHandler
|
2020-10-14 13:42:00 +00:00
|
|
|
protocol connection.Protocol
|
|
|
|
inFallback bool
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func (pf *protocolFallback) reset() {
|
2021-03-26 04:04:56 +00:00
|
|
|
pf.ResetNow()
|
2020-10-14 13:42:00 +00:00
|
|
|
pf.inFallback = false
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func (pf *protocolFallback) fallback(fallback connection.Protocol) {
|
2021-03-26 04:04:56 +00:00
|
|
|
pf.ResetNow()
|
2020-10-14 13:42:00 +00:00
|
|
|
pf.protocol = fallback
|
|
|
|
pf.inFallback = true
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
// selectNextProtocol picks connection protocol for the next retry iteration,
|
|
|
|
// returns true if it was able to pick the protocol, false if we are out of options and should stop retrying
|
|
|
|
func selectNextProtocol(
|
|
|
|
connLog *zerolog.Logger,
|
|
|
|
protocolBackoff *protocolFallback,
|
|
|
|
selector connection.ProtocolSelector,
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
cause error,
|
2021-02-05 00:07:49 +00:00
|
|
|
) bool {
|
2022-09-06 12:20:50 +00:00
|
|
|
isQuicBroken := isQuicBroken(cause)
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
_, hasFallback := selector.Fallback()
|
|
|
|
|
2022-09-06 12:20:50 +00:00
|
|
|
if protocolBackoff.ReachedMaxRetries() || (hasFallback && isQuicBroken) {
|
|
|
|
if isQuicBroken {
|
2022-04-11 21:50:37 +00:00
|
|
|
connLog.Warn().Msg("If this log occurs persistently, and cloudflared is unable to connect to " +
|
|
|
|
"Cloudflare Network with `quic` protocol, then most likely your machine/network is getting its egress " +
|
|
|
|
"UDP to port 7844 (or others) blocked or dropped. Make sure to allow egress connectivity as per " +
|
|
|
|
"https://developers.cloudflare.com/cloudflare-one/connections/connect-apps/configuration/ports-and-ips/\n" +
|
2023-09-05 14:58:19 +00:00
|
|
|
"If you are using private routing to this Tunnel, then ICMP, UDP (and Private DNS Resolution) will not work " +
|
2022-04-11 21:50:37 +00:00
|
|
|
"unless your cloudflared can connect with Cloudflare Network with `quic`.")
|
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
fallback, hasFallback := selector.Fallback()
|
2020-10-14 13:42:00 +00:00
|
|
|
if !hasFallback {
|
2021-02-05 00:07:49 +00:00
|
|
|
return false
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
// Already using fallback protocol, no point to retry
|
2021-02-05 00:07:49 +00:00
|
|
|
if protocolBackoff.protocol == fallback {
|
|
|
|
return false
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
2021-02-05 00:07:49 +00:00
|
|
|
connLog.Info().Msgf("Switching to fallback protocol %s", fallback)
|
|
|
|
protocolBackoff.fallback(fallback)
|
|
|
|
} else if !protocolBackoff.inFallback {
|
|
|
|
current := selector.Current()
|
|
|
|
if protocolBackoff.protocol != current {
|
|
|
|
protocolBackoff.protocol = current
|
|
|
|
connLog.Info().Msgf("Changing protocol to %s", current)
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-05 00:07:49 +00:00
|
|
|
return true
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2022-09-06 12:20:50 +00:00
|
|
|
func isQuicBroken(cause error) bool {
|
|
|
|
var idleTimeoutError *quic.IdleTimeoutError
|
|
|
|
if errors.As(cause, &idleTimeoutError) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
var transportError *quic.TransportError
|
|
|
|
if errors.As(cause, &transportError) && strings.Contains(cause.Error(), "operation not permitted") {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
// ServeTunnel runs a single tunnel connection, returns nil on graceful shutdown,
|
|
|
|
// on error returns a flag indicating if error can be retried
|
2022-08-18 15:03:47 +00:00
|
|
|
func (e *EdgeTunnelServer) serveTunnel(
|
2018-05-01 23:45:06 +00:00
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2021-08-06 13:31:22 +00:00
|
|
|
addr *allregions.EdgeAddr,
|
2020-10-14 13:42:00 +00:00
|
|
|
connIndex uint8,
|
2020-10-08 10:12:26 +00:00
|
|
|
fuse *h2mux.BooleanFuse,
|
2021-02-05 00:07:49 +00:00
|
|
|
backoff *protocolFallback,
|
2020-10-14 13:42:00 +00:00
|
|
|
protocol connection.Protocol,
|
2018-05-01 23:45:06 +00:00
|
|
|
) (err error, recoverable bool) {
|
|
|
|
// Treat panics as recoverable errors
|
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
var ok bool
|
|
|
|
err, ok = r.(error)
|
|
|
|
if !ok {
|
|
|
|
err = fmt.Errorf("ServeTunnel: %v", r)
|
|
|
|
}
|
2020-10-08 10:12:26 +00:00
|
|
|
err = errors.Wrapf(err, "stack trace: %s", string(debug.Stack()))
|
2018-05-01 23:45:06 +00:00
|
|
|
recoverable = true
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
defer e.config.Observer.SendDisconnect(connIndex)
|
|
|
|
err, recoverable = e.serveConnection(
|
2021-09-21 06:11:36 +00:00
|
|
|
ctx,
|
|
|
|
connLog,
|
|
|
|
addr,
|
|
|
|
connIndex,
|
|
|
|
fuse,
|
|
|
|
backoff,
|
|
|
|
protocol,
|
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
switch err := err.(type) {
|
|
|
|
case connection.DupConnRegisterTunnelError:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Unable to establish connection.")
|
2021-09-21 06:11:36 +00:00
|
|
|
// don't retry this connection anymore, let supervisor pick a new address
|
|
|
|
return err, false
|
|
|
|
case connection.ServerRegisterTunnelError:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Register tunnel error from server side")
|
2021-09-21 06:11:36 +00:00
|
|
|
// Don't send registration error return from server to Sentry. They are
|
|
|
|
// logged on server side
|
|
|
|
return err.Cause, !err.Permanent
|
2022-05-20 21:51:36 +00:00
|
|
|
case *connection.EdgeQuicDialError:
|
2022-12-14 11:43:52 +00:00
|
|
|
return err, false
|
2021-09-21 06:11:36 +00:00
|
|
|
case ReconnectSignal:
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Info().
|
2022-05-20 21:51:36 +00:00
|
|
|
IPAddr(connection.LogFieldIPAddress, addr.UDP.IP).
|
2021-09-21 06:11:36 +00:00
|
|
|
Uint8(connection.LogFieldConnIndex, connIndex).
|
|
|
|
Msgf("Restarting connection due to reconnect signal in %s", err.Delay)
|
|
|
|
err.DelayBeforeReconnect()
|
|
|
|
return err, true
|
|
|
|
default:
|
|
|
|
if err == context.Canceled {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Debug().Err(err).Msgf("Serve tunnel error")
|
2021-09-21 06:11:36 +00:00
|
|
|
return err, false
|
|
|
|
}
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msgf("Serve tunnel error")
|
2021-09-21 06:11:36 +00:00
|
|
|
_, permanent := err.(unrecoverableError)
|
|
|
|
return err, !permanent
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
func (e *EdgeTunnelServer) serveConnection(
|
2021-09-21 06:11:36 +00:00
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2021-09-21 06:11:36 +00:00
|
|
|
addr *allregions.EdgeAddr,
|
|
|
|
connIndex uint8,
|
|
|
|
fuse *h2mux.BooleanFuse,
|
|
|
|
backoff *protocolFallback,
|
|
|
|
protocol connection.Protocol,
|
|
|
|
) (err error, recoverable bool) {
|
2021-08-17 14:30:02 +00:00
|
|
|
connectedFuse := &connectedFuse{
|
|
|
|
fuse: fuse,
|
|
|
|
backoff: backoff,
|
|
|
|
}
|
|
|
|
controlStream := connection.NewControlStream(
|
2022-08-18 15:03:47 +00:00
|
|
|
e.config.Observer,
|
2021-08-17 14:30:02 +00:00
|
|
|
connectedFuse,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.config.NamedTunnel,
|
2021-08-17 14:30:02 +00:00
|
|
|
connIndex,
|
2022-06-18 00:24:37 +00:00
|
|
|
addr.UDP.IP,
|
2021-08-17 14:30:02 +00:00
|
|
|
nil,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.gracefulShutdownC,
|
|
|
|
e.config.GracePeriod,
|
2022-08-11 20:31:36 +00:00
|
|
|
protocol,
|
2021-08-17 14:30:02 +00:00
|
|
|
)
|
|
|
|
|
2021-09-21 06:11:36 +00:00
|
|
|
switch protocol {
|
2023-02-06 19:06:02 +00:00
|
|
|
case connection.QUIC:
|
2022-08-18 15:03:47 +00:00
|
|
|
connOptions := e.config.connectionOptions(addr.UDP.String(), uint8(backoff.Retries()))
|
|
|
|
return e.serveQUIC(ctx,
|
2021-08-17 14:30:02 +00:00
|
|
|
addr.UDP,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog,
|
2021-08-17 14:30:02 +00:00
|
|
|
connOptions,
|
|
|
|
controlStream,
|
2022-08-18 15:03:47 +00:00
|
|
|
connIndex)
|
2021-08-17 14:30:02 +00:00
|
|
|
|
2023-02-06 19:06:02 +00:00
|
|
|
case connection.HTTP2:
|
2023-02-28 16:11:42 +00:00
|
|
|
edgeConn, err := edgediscovery.DialEdge(ctx, dialTimeout, e.config.EdgeTLSConfigs[protocol], addr.TCP, e.edgeBindAddr)
|
2021-09-21 06:11:36 +00:00
|
|
|
if err != nil {
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.ConnAwareLogger().Err(err).Msg("Unable to establish connection with Cloudflare edge")
|
2021-09-21 06:11:36 +00:00
|
|
|
return err, true
|
|
|
|
}
|
2021-01-27 13:19:37 +00:00
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
connOptions := e.config.connectionOptions(edgeConn.LocalAddr().String(), uint8(backoff.Retries()))
|
|
|
|
if err := e.serveHTTP2(
|
2021-01-20 19:41:09 +00:00
|
|
|
ctx,
|
2021-02-22 15:30:27 +00:00
|
|
|
connLog,
|
2021-01-20 19:41:09 +00:00
|
|
|
edgeConn,
|
|
|
|
connOptions,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStream,
|
2021-01-20 19:41:09 +00:00
|
|
|
connIndex,
|
2021-09-21 06:11:36 +00:00
|
|
|
); err != nil {
|
|
|
|
return err, false
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
2023-03-07 21:51:37 +00:00
|
|
|
return fmt.Errorf("invalid protocol selected: %s", protocol), false
|
2021-01-27 13:19:37 +00:00
|
|
|
}
|
2021-09-21 06:11:36 +00:00
|
|
|
return
|
2021-01-27 13:19:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type unrecoverableError struct {
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r unrecoverableError) Error() string {
|
|
|
|
return r.err.Error()
|
2020-10-08 10:12:26 +00:00
|
|
|
}
|
2020-10-08 09:48:10 +00:00
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
func (e *EdgeTunnelServer) serveHTTP2(
|
2020-09-11 22:02:34 +00:00
|
|
|
ctx context.Context,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog *ConnAwareLogger,
|
2020-10-08 10:12:26 +00:00
|
|
|
tlsServerConn net.Conn,
|
|
|
|
connOptions *tunnelpogs.ConnectionOptions,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStreamHandler connection.ControlStreamHandler,
|
2020-09-25 13:12:53 +00:00
|
|
|
connIndex uint8,
|
2021-01-27 13:19:37 +00:00
|
|
|
) error {
|
2023-08-25 13:39:25 +00:00
|
|
|
pqMode := e.config.FeatureSelector.PostQuantumMode()
|
|
|
|
if pqMode == features.PostQuantumStrict {
|
2022-08-24 12:33:10 +00:00
|
|
|
return unrecoverableError{errors.New("HTTP/2 transport does not support post-quantum")}
|
|
|
|
}
|
|
|
|
|
2021-11-08 15:43:36 +00:00
|
|
|
connLog.Logger().Debug().Msgf("Connecting via http2")
|
2021-01-20 19:41:09 +00:00
|
|
|
h2conn := connection.NewHTTP2Connection(
|
2020-11-25 06:55:13 +00:00
|
|
|
tlsServerConn,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.orchestrator,
|
2020-11-25 06:55:13 +00:00
|
|
|
connOptions,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.config.Observer,
|
2020-11-25 06:55:13 +00:00
|
|
|
connIndex,
|
2021-08-17 14:30:02 +00:00
|
|
|
controlStreamHandler,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.config.Log,
|
2020-11-25 06:55:13 +00:00
|
|
|
)
|
2020-09-11 22:02:34 +00:00
|
|
|
|
2020-09-25 13:12:53 +00:00
|
|
|
errGroup, serveCtx := errgroup.WithContext(ctx)
|
|
|
|
errGroup.Go(func() error {
|
2021-01-20 19:41:09 +00:00
|
|
|
return h2conn.Serve(serveCtx)
|
2020-09-25 13:12:53 +00:00
|
|
|
})
|
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
errGroup.Go(func() error {
|
2022-08-18 15:03:47 +00:00
|
|
|
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
2021-01-27 13:19:37 +00:00
|
|
|
if err != nil {
|
|
|
|
// forcefully break the connection (this is only used for testing)
|
2022-08-31 19:52:44 +00:00
|
|
|
// errgroup will return context canceled for the h2conn.Serve
|
2022-05-20 21:51:36 +00:00
|
|
|
connLog.Logger().Debug().Msg("Forcefully breaking http2 connection")
|
2021-01-27 13:19:37 +00:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
2020-09-11 22:02:34 +00:00
|
|
|
|
2021-01-27 13:19:37 +00:00
|
|
|
return errGroup.Wait()
|
2020-09-11 22:02:34 +00:00
|
|
|
}
|
|
|
|
|
2022-08-18 15:03:47 +00:00
|
|
|
func (e *EdgeTunnelServer) serveQUIC(
|
2021-08-17 14:30:02 +00:00
|
|
|
ctx context.Context,
|
|
|
|
edgeAddr *net.UDPAddr,
|
2021-11-08 15:43:36 +00:00
|
|
|
connLogger *ConnAwareLogger,
|
2021-08-17 14:30:02 +00:00
|
|
|
connOptions *tunnelpogs.ConnectionOptions,
|
|
|
|
controlStreamHandler connection.ControlStreamHandler,
|
2021-10-15 11:05:54 +00:00
|
|
|
connIndex uint8,
|
2021-08-17 14:30:02 +00:00
|
|
|
) (err error, recoverable bool) {
|
2022-08-18 15:03:47 +00:00
|
|
|
tlsConfig := e.config.EdgeTLSConfigs[connection.QUIC]
|
2022-08-24 12:33:10 +00:00
|
|
|
|
2023-08-25 13:39:25 +00:00
|
|
|
pqMode := e.config.FeatureSelector.PostQuantumMode()
|
|
|
|
if pqMode == features.PostQuantumStrict || pqMode == features.PostQuantumPrefer {
|
|
|
|
connOptions.Client.Features = features.Dedup(append(connOptions.Client.Features, features.FeaturePostQuantum))
|
2022-08-24 12:33:10 +00:00
|
|
|
}
|
|
|
|
|
2023-08-25 13:39:25 +00:00
|
|
|
curvePref, err := curvePreference(pqMode, tlsConfig.CurvePreferences)
|
|
|
|
if err != nil {
|
|
|
|
return err, true
|
|
|
|
}
|
|
|
|
|
|
|
|
tlsConfig.CurvePreferences = curvePref
|
|
|
|
|
2021-08-17 14:30:02 +00:00
|
|
|
quicConfig := &quic.Config{
|
2023-07-12 09:37:19 +00:00
|
|
|
HandshakeIdleTimeout: quicpogs.HandshakeIdleTimeout,
|
|
|
|
MaxIdleTimeout: quicpogs.MaxIdleTimeout,
|
|
|
|
KeepAlivePeriod: quicpogs.MaxIdlePingPeriod,
|
|
|
|
MaxIncomingStreams: quicpogs.MaxIncomingStreams,
|
|
|
|
MaxIncomingUniStreams: quicpogs.MaxIncomingStreams,
|
|
|
|
EnableDatagrams: true,
|
|
|
|
Tracer: quicpogs.NewClientTracer(connLogger.Logger(), connIndex),
|
|
|
|
DisablePathMTUDiscovery: e.config.DisableQUICPathMTUDiscovery,
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
quicConn, err := connection.NewQUICConnection(
|
2023-05-06 00:42:41 +00:00
|
|
|
ctx,
|
2022-01-05 16:01:56 +00:00
|
|
|
quicConfig,
|
|
|
|
edgeAddr,
|
2023-02-28 16:11:42 +00:00
|
|
|
e.edgeBindAddr,
|
2022-10-12 16:01:25 +00:00
|
|
|
connIndex,
|
2022-01-05 16:01:56 +00:00
|
|
|
tlsConfig,
|
2022-08-18 15:03:47 +00:00
|
|
|
e.orchestrator,
|
2022-01-05 16:01:56 +00:00
|
|
|
connOptions,
|
|
|
|
controlStreamHandler,
|
2022-08-18 15:03:47 +00:00
|
|
|
connLogger.Logger(),
|
2023-06-19 16:03:11 +00:00
|
|
|
e.config.PacketConfig,
|
2024-05-14 04:22:06 +00:00
|
|
|
e.config.RPCTimeout,
|
2024-02-12 18:58:55 +00:00
|
|
|
e.config.WriteStreamTimeout,
|
2023-06-19 16:03:11 +00:00
|
|
|
)
|
2022-01-05 16:01:56 +00:00
|
|
|
if err != nil {
|
|
|
|
connLogger.ConnAwareLogger().Err(err).Msgf("Failed to create new quic connection")
|
|
|
|
return err, true
|
|
|
|
}
|
2021-08-17 14:30:02 +00:00
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
errGroup, serveCtx := errgroup.WithContext(ctx)
|
|
|
|
errGroup.Go(func() error {
|
|
|
|
err := quicConn.Serve(serveCtx)
|
|
|
|
if err != nil {
|
|
|
|
connLogger.ConnAwareLogger().Err(err).Msg("Failed to serve quic connection")
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
2021-08-17 14:30:02 +00:00
|
|
|
|
2022-01-05 16:01:56 +00:00
|
|
|
errGroup.Go(func() error {
|
2022-08-18 15:03:47 +00:00
|
|
|
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
2022-01-05 16:01:56 +00:00
|
|
|
if err != nil {
|
|
|
|
// forcefully break the connection (this is only used for testing)
|
2022-08-31 19:52:44 +00:00
|
|
|
// errgroup will return context canceled for the quicConn.Serve
|
2022-05-20 21:51:36 +00:00
|
|
|
connLogger.Logger().Debug().Msg("Forcefully breaking quic connection")
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
2022-01-05 16:01:56 +00:00
|
|
|
return err
|
|
|
|
})
|
|
|
|
|
|
|
|
return errGroup.Wait(), false
|
2021-08-17 14:30:02 +00:00
|
|
|
}
|
|
|
|
|
2021-02-05 00:07:49 +00:00
|
|
|
func listenReconnect(ctx context.Context, reconnectCh <-chan ReconnectSignal, gracefulShutdownCh <-chan struct{}) error {
|
2021-01-27 13:19:37 +00:00
|
|
|
select {
|
|
|
|
case reconnect := <-reconnectCh:
|
|
|
|
return reconnect
|
|
|
|
case <-gracefulShutdownCh:
|
|
|
|
return nil
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|
2019-04-25 23:13:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
type connectedFuse struct {
|
|
|
|
fuse *h2mux.BooleanFuse
|
2021-02-05 00:07:49 +00:00
|
|
|
backoff *protocolFallback
|
2018-05-01 23:45:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func (cf *connectedFuse) Connected() {
|
|
|
|
cf.fuse.Fuse(true)
|
2020-10-14 13:42:00 +00:00
|
|
|
cf.backoff.reset()
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|
|
|
|
|
2020-10-08 10:12:26 +00:00
|
|
|
func (cf *connectedFuse) IsConnected() bool {
|
|
|
|
return cf.fuse.Value()
|
2018-10-08 19:20:28 +00:00
|
|
|
}
|