TUN-8709: Add session migration for datagram v3

When a registration response from cloudflared gets lost on it's way back to the edge, the edge service will retry and send another registration request. Since cloudflared already has bound the local UDP socket for the provided request id, we want to re-send the registration response.

There are three types of retries that the edge will send:

1. A retry from the same QUIC connection index; cloudflared will just respond back with a registration response and reset the idle timer for the session.
2. A retry from a different QUIC connection index; cloudflared will need to migrate the current session connection to this new QUIC connection and reset the idle timer for the session.
3. A retry to a different cloudflared connector; cloudflared will eventually time the session out since no further packets will arrive to the session at the original connector.

Closes TUN-8709
This commit is contained in:
Devin Carr 2024-11-06 12:06:07 -08:00
parent 70393b6de4
commit 952622a965
9 changed files with 362 additions and 78 deletions

View File

@ -24,9 +24,10 @@ type datagramV3Connection struct {
func NewDatagramV3Connection(ctx context.Context, func NewDatagramV3Connection(ctx context.Context,
conn quic.Connection, conn quic.Connection,
sessionManager cfdquic.SessionManager, sessionManager cfdquic.SessionManager,
index uint8,
logger *zerolog.Logger, logger *zerolog.Logger,
) DatagramSessionHandler { ) DatagramSessionHandler {
datagramMuxer := cfdquic.NewDatagramConn(conn, sessionManager, logger) datagramMuxer := cfdquic.NewDatagramConn(conn, sessionManager, index, logger)
return &datagramV3Connection{ return &datagramV3Connection{
conn, conn,

View File

@ -284,8 +284,6 @@ const (
ResponseDestinationUnreachable SessionRegistrationResp = 0x01 ResponseDestinationUnreachable SessionRegistrationResp = 0x01
// Session registration was unable to bind to a local UDP socket. // Session registration was unable to bind to a local UDP socket.
ResponseUnableToBindSocket SessionRegistrationResp = 0x02 ResponseUnableToBindSocket SessionRegistrationResp = 0x02
// Session registration is already bound to another connection.
ResponseSessionAlreadyConnected SessionRegistrationResp = 0x03
// Session registration failed with an unexpected error but provided a message. // Session registration failed with an unexpected error but provided a message.
ResponseErrorWithMsg SessionRegistrationResp = 0xff ResponseErrorWithMsg SessionRegistrationResp = 0xff
) )

View File

@ -12,15 +12,19 @@ import (
) )
var ( var (
// ErrSessionNotFound indicates that a session has not been registered yet for the request id.
ErrSessionNotFound = errors.New("session not found") ErrSessionNotFound = errors.New("session not found")
// ErrSessionBoundToOtherConn is returned when a registration already exists for a different connection.
ErrSessionBoundToOtherConn = errors.New("session is in use by another connection") ErrSessionBoundToOtherConn = errors.New("session is in use by another connection")
// ErrSessionAlreadyRegistered is returned when a registration already exists for this connection.
ErrSessionAlreadyRegistered = errors.New("session is already registered for this connection")
) )
type SessionManager interface { type SessionManager interface {
// RegisterSession will register a new session if it does not already exist for the request ID. // RegisterSession will register a new session if it does not already exist for the request ID.
// During new session creation, the session will also bind the UDP socket for the origin. // During new session creation, the session will also bind the UDP socket for the origin.
// If the session exists for a different connection, it will return [ErrSessionBoundToOtherConn]. // If the session exists for a different connection, it will return [ErrSessionBoundToOtherConn].
RegisterSession(request *UDPSessionRegistrationDatagram, conn DatagramWriter) (Session, error) RegisterSession(request *UDPSessionRegistrationDatagram, conn DatagramConn) (Session, error)
// GetSession returns an active session if available for the provided connection. // GetSession returns an active session if available for the provided connection.
// If the session does not exist, it will return [ErrSessionNotFound]. If the session exists for a different // If the session does not exist, it will return [ErrSessionNotFound]. If the session exists for a different
// connection, it will return [ErrSessionBoundToOtherConn]. // connection, it will return [ErrSessionBoundToOtherConn].
@ -45,12 +49,14 @@ func NewSessionManager(log *zerolog.Logger, originDialer DialUDP) SessionManager
} }
} }
func (s *sessionManager) RegisterSession(request *UDPSessionRegistrationDatagram, conn DatagramWriter) (Session, error) { func (s *sessionManager) RegisterSession(request *UDPSessionRegistrationDatagram, conn DatagramConn) (Session, error) {
s.mutex.Lock() s.mutex.Lock()
defer s.mutex.Unlock() defer s.mutex.Unlock()
// Check to make sure session doesn't already exist for requestID // Check to make sure session doesn't already exist for requestID
_, exists := s.sessions[request.RequestID] if session, exists := s.sessions[request.RequestID]; exists {
if exists { if conn.ID() == session.ConnectionID() {
return nil, ErrSessionAlreadyRegistered
}
return nil, ErrSessionBoundToOtherConn return nil, ErrSessionBoundToOtherConn
} }
// Attempt to bind the UDP socket for the new session // Attempt to bind the UDP socket for the new session

View File

@ -34,8 +34,14 @@ func TestRegisterSession(t *testing.T) {
// We shouldn't be able to register another session with the same request id // We shouldn't be able to register another session with the same request id
_, err = manager.RegisterSession(&request, &noopEyeball{}) _, err = manager.RegisterSession(&request, &noopEyeball{})
if !errors.Is(err, v3.ErrSessionAlreadyRegistered) {
t.Fatalf("session is already registered for this connection: %v", err)
}
// We shouldn't be able to register another session with the same request id for a different connection
_, err = manager.RegisterSession(&request, &noopEyeball{connID: 1})
if !errors.Is(err, v3.ErrSessionBoundToOtherConn) { if !errors.Is(err, v3.ErrSessionBoundToOtherConn) {
t.Fatalf("session should not be able to be registered again: %v", err) t.Fatalf("session is already registered for a separate connection: %v", err)
} }
// Get session // Get session

View File

@ -19,6 +19,8 @@ type DatagramConn interface {
DatagramWriter DatagramWriter
// Serve provides a server interface to process and handle incoming QUIC datagrams and demux their datagram v3 payloads. // Serve provides a server interface to process and handle incoming QUIC datagrams and demux their datagram v3 payloads.
Serve(context.Context) error Serve(context.Context) error
// ID indicates connection index identifier
ID() uint8
} }
// DatagramWriter provides the Muxer interface to create proper Datagrams when sending over a connection. // DatagramWriter provides the Muxer interface to create proper Datagrams when sending over a connection.
@ -41,6 +43,7 @@ type QuicConnection interface {
type datagramConn struct { type datagramConn struct {
conn QuicConnection conn QuicConnection
index uint8
sessionManager SessionManager sessionManager SessionManager
logger *zerolog.Logger logger *zerolog.Logger
@ -48,10 +51,11 @@ type datagramConn struct {
readErrors chan error readErrors chan error
} }
func NewDatagramConn(conn QuicConnection, sessionManager SessionManager, logger *zerolog.Logger) DatagramConn { func NewDatagramConn(conn QuicConnection, sessionManager SessionManager, index uint8, logger *zerolog.Logger) DatagramConn {
log := logger.With().Uint8("datagramVersion", 3).Logger() log := logger.With().Uint8("datagramVersion", 3).Logger()
return &datagramConn{ return &datagramConn{
conn: conn, conn: conn,
index: index,
sessionManager: sessionManager, sessionManager: sessionManager,
logger: &log, logger: &log,
datagrams: make(chan []byte, demuxChanCapacity), datagrams: make(chan []byte, demuxChanCapacity),
@ -59,6 +63,10 @@ func NewDatagramConn(conn QuicConnection, sessionManager SessionManager, logger
} }
} }
func (c datagramConn) ID() uint8 {
return c.index
}
func (c *datagramConn) SendUDPSessionDatagram(datagram []byte) error { func (c *datagramConn) SendUDPSessionDatagram(datagram []byte) error {
return c.conn.SendDatagram(datagram) return c.conn.SendDatagram(datagram)
} }
@ -163,9 +171,20 @@ func (c *datagramConn) Serve(ctx context.Context) error {
// This method handles new registrations of a session and the serve loop for the session. // This method handles new registrations of a session and the serve loop for the session.
func (c *datagramConn) handleSessionRegistrationDatagram(ctx context.Context, datagram *UDPSessionRegistrationDatagram) { func (c *datagramConn) handleSessionRegistrationDatagram(ctx context.Context, datagram *UDPSessionRegistrationDatagram) {
session, err := c.sessionManager.RegisterSession(datagram, c) session, err := c.sessionManager.RegisterSession(datagram, c)
if err != nil { switch err {
case nil:
// Continue as normal
case ErrSessionAlreadyRegistered:
// Session is already registered and likely the response got lost
c.handleSessionAlreadyRegistered(datagram.RequestID)
return
case ErrSessionBoundToOtherConn:
// Session is already registered but to a different connection
c.handleSessionMigration(datagram.RequestID)
return
default:
c.logger.Err(err).Msgf("session registration failure") c.logger.Err(err).Msgf("session registration failure")
c.handleSessionRegistrationFailure(datagram.RequestID, err) c.handleSessionRegistrationFailure(datagram.RequestID)
return return
} }
// Make sure to eventually remove the session from the session manager when the session is closed // Make sure to eventually remove the session from the session manager when the session is closed
@ -197,17 +216,49 @@ func (c *datagramConn) handleSessionRegistrationDatagram(ctx context.Context, da
c.logger.Err(err).Msgf("session was closed with an error") c.logger.Err(err).Msgf("session was closed with an error")
} }
func (c *datagramConn) handleSessionRegistrationFailure(requestID RequestID, regErr error) { func (c *datagramConn) handleSessionAlreadyRegistered(requestID RequestID) {
var errResp SessionRegistrationResp // Send another registration response since the session is already active
switch regErr { err := c.SendUDPSessionResponse(requestID, ResponseOk)
case ErrSessionBoundToOtherConn:
errResp = ResponseSessionAlreadyConnected
default:
errResp = ResponseUnableToBindSocket
}
err := c.SendUDPSessionResponse(requestID, errResp)
if err != nil { if err != nil {
c.logger.Err(err).Msgf("unable to send session registration error response (%d)", errResp) c.logger.Err(err).Msgf("session registration failure: unable to send an additional session registration response")
return
}
session, err := c.sessionManager.GetSession(requestID)
if err != nil {
// If for some reason we can not find the session after attempting to register it, we can just return
// instead of trying to reset the idle timer for it.
return
}
// The session is already running in another routine so we want to restart the idle timeout since no proxied
// packets have come down yet.
session.ResetIdleTimer()
}
func (c *datagramConn) handleSessionMigration(requestID RequestID) {
// We need to migrate the currently running session to this edge connection.
session, err := c.sessionManager.GetSession(requestID)
if err != nil {
// If for some reason we can not find the session after attempting to register it, we can just return
// instead of trying to reset the idle timer for it.
return
}
// Migrate the session to use this edge connection instead of the currently running one.
session.Migrate(c)
// Send another registration response since the session is already active
err = c.SendUDPSessionResponse(requestID, ResponseOk)
if err != nil {
c.logger.Err(err).Msgf("session registration failure: unable to send an additional session registration response")
return
}
}
func (c *datagramConn) handleSessionRegistrationFailure(requestID RequestID) {
err := c.SendUDPSessionResponse(requestID, ResponseUnableToBindSocket)
if err != nil {
c.logger.Err(err).Msgf("unable to send session registration error response (%d)", ResponseUnableToBindSocket)
} }
} }

View File

@ -17,17 +17,19 @@ import (
v3 "github.com/cloudflare/cloudflared/quic/v3" v3 "github.com/cloudflare/cloudflared/quic/v3"
) )
type noopEyeball struct{} type noopEyeball struct {
connID uint8
func (noopEyeball) SendUDPSessionDatagram(datagram []byte) error {
return nil
} }
func (noopEyeball) Serve(ctx context.Context) error { return nil }
func (n noopEyeball) ID() uint8 { return n.connID }
func (noopEyeball) SendUDPSessionDatagram(datagram []byte) error { return nil }
func (noopEyeball) SendUDPSessionResponse(id v3.RequestID, resp v3.SessionRegistrationResp) error { func (noopEyeball) SendUDPSessionResponse(id v3.RequestID, resp v3.SessionRegistrationResp) error {
return nil return nil
} }
type mockEyeball struct { type mockEyeball struct {
connID uint8
// datagram sent via SendUDPSessionDatagram // datagram sent via SendUDPSessionDatagram
recvData chan []byte recvData chan []byte
// responses sent via SendUDPSessionResponse // responses sent via SendUDPSessionResponse
@ -39,6 +41,7 @@ type mockEyeball struct {
func newMockEyeball() mockEyeball { func newMockEyeball() mockEyeball {
return mockEyeball{ return mockEyeball{
connID: 0,
recvData: make(chan []byte, 1), recvData: make(chan []byte, 1),
recvResp: make(chan struct { recvResp: make(chan struct {
id v3.RequestID id v3.RequestID
@ -47,6 +50,9 @@ func newMockEyeball() mockEyeball {
} }
} }
func (mockEyeball) Serve(ctx context.Context) error { return nil }
func (m *mockEyeball) ID() uint8 { return m.connID }
func (m *mockEyeball) SendUDPSessionDatagram(datagram []byte) error { func (m *mockEyeball) SendUDPSessionDatagram(datagram []byte) error {
b := make([]byte, len(datagram)) b := make([]byte, len(datagram))
copy(b, datagram) copy(b, datagram)
@ -66,7 +72,7 @@ func (m *mockEyeball) SendUDPSessionResponse(id v3.RequestID, resp v3.SessionReg
func TestDatagramConn_New(t *testing.T) { func TestDatagramConn_New(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
conn := v3.NewDatagramConn(newMockQuicConn(), v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(newMockQuicConn(), v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
if conn == nil { if conn == nil {
t.Fatal("expected valid connection") t.Fatal("expected valid connection")
} }
@ -75,7 +81,7 @@ func TestDatagramConn_New(t *testing.T) {
func TestDatagramConn_SendUDPSessionDatagram(t *testing.T) { func TestDatagramConn_SendUDPSessionDatagram(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
quic := newMockQuicConn() quic := newMockQuicConn()
conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
payload := []byte{0xef, 0xef} payload := []byte{0xef, 0xef}
conn.SendUDPSessionDatagram(payload) conn.SendUDPSessionDatagram(payload)
@ -88,7 +94,7 @@ func TestDatagramConn_SendUDPSessionDatagram(t *testing.T) {
func TestDatagramConn_SendUDPSessionResponse(t *testing.T) { func TestDatagramConn_SendUDPSessionResponse(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
quic := newMockQuicConn() quic := newMockQuicConn()
conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
conn.SendUDPSessionResponse(testRequestID, v3.ResponseDestinationUnreachable) conn.SendUDPSessionResponse(testRequestID, v3.ResponseDestinationUnreachable)
resp := <-quic.recv resp := <-quic.recv
@ -109,7 +115,7 @@ func TestDatagramConn_SendUDPSessionResponse(t *testing.T) {
func TestDatagramConnServe_ApplicationClosed(t *testing.T) { func TestDatagramConnServe_ApplicationClosed(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
quic := newMockQuicConn() quic := newMockQuicConn()
conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel() defer cancel()
@ -125,7 +131,7 @@ func TestDatagramConnServe_ConnectionClosed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel() defer cancel()
quic.ctx = ctx quic.ctx = ctx
conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
err := conn.Serve(context.Background()) err := conn.Serve(context.Background())
if !errors.Is(err, context.DeadlineExceeded) { if !errors.Is(err, context.DeadlineExceeded) {
@ -136,7 +142,7 @@ func TestDatagramConnServe_ConnectionClosed(t *testing.T) {
func TestDatagramConnServe_ReceiveDatagramError(t *testing.T) { func TestDatagramConnServe_ReceiveDatagramError(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
quic := &mockQuicConnReadError{err: net.ErrClosed} quic := &mockQuicConnReadError{err: net.ErrClosed}
conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), &log) conn := v3.NewDatagramConn(quic, v3.NewSessionManager(&log, ingress.DialUDPAddrPort), 0, &log)
err := conn.Serve(context.Background()) err := conn.Serve(context.Background())
if !errors.Is(err, net.ErrClosed) { if !errors.Is(err, net.ErrClosed) {
@ -171,7 +177,7 @@ func TestDatagramConnServe_ErrorDatagramTypes(t *testing.T) {
log := zerolog.New(logOutput) log := zerolog.New(logOutput)
quic := newMockQuicConn() quic := newMockQuicConn()
quic.send <- test.input quic.send <- test.input
conn := v3.NewDatagramConn(quic, &mockSessionManager{}, &log) conn := v3.NewDatagramConn(quic, &mockSessionManager{}, 0, &log)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel() defer cancel()
@ -212,7 +218,7 @@ func TestDatagramConnServe_RegisterSession_SessionManagerError(t *testing.T) {
quic := newMockQuicConn() quic := newMockQuicConn()
expectedErr := errors.New("unable to register session") expectedErr := errors.New("unable to register session")
sessionManager := mockSessionManager{expectedRegErr: expectedErr} sessionManager := mockSessionManager{expectedRegErr: expectedErr}
conn := v3.NewDatagramConn(quic, &sessionManager, &log) conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
// Setup the muxer // Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background()) ctx, cancel := context.WithCancelCause(context.Background())
@ -234,19 +240,12 @@ func TestDatagramConnServe_RegisterSession_SessionManagerError(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if resp.RequestID != testRequestID && resp.ResponseType != v3.ResponseUnableToBindSocket { if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseUnableToBindSocket {
t.Fatalf("expected registration response failure") t.Fatalf("expected registration response failure")
} }
// Cancel the muxer Serve context and make sure it closes with the expected error // Cancel the muxer Serve context and make sure it closes with the expected error
cancel(expectedContextCanceled) assertContextClosed(t, ctx, done, cancel)
err = <-done
if !errors.Is(err, context.Canceled) {
t.Fatal(err)
}
if !errors.Is(context.Cause(ctx), expectedContextCanceled) {
t.Fatal(err)
}
} }
func TestDatagramConnServe(t *testing.T) { func TestDatagramConnServe(t *testing.T) {
@ -254,7 +253,7 @@ func TestDatagramConnServe(t *testing.T) {
quic := newMockQuicConn() quic := newMockQuicConn()
session := newMockSession() session := newMockSession()
sessionManager := mockSessionManager{session: &session} sessionManager := mockSessionManager{session: &session}
conn := v3.NewDatagramConn(quic, &sessionManager, &log) conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
// Setup the muxer // Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background()) ctx, cancel := context.WithCancelCause(context.Background())
@ -276,7 +275,7 @@ func TestDatagramConnServe(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if resp.RequestID != testRequestID && resp.ResponseType != v3.ResponseOk { if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseOk {
t.Fatalf("expected registration response ok") t.Fatalf("expected registration response ok")
} }
@ -291,21 +290,160 @@ func TestDatagramConnServe(t *testing.T) {
} }
// Cancel the muxer Serve context and make sure it closes with the expected error // Cancel the muxer Serve context and make sure it closes with the expected error
cancel(expectedContextCanceled) assertContextClosed(t, ctx, done, cancel)
err = <-done }
if !errors.Is(err, context.Canceled) {
func TestDatagramConnServe_RegisterTwice(t *testing.T) {
log := zerolog.Nop()
quic := newMockQuicConn()
session := newMockSession()
sessionManager := mockSessionManager{session: &session}
conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
// Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background())
defer cancel(errors.New("other error"))
done := make(chan error, 1)
go func() {
done <- conn.Serve(ctx)
}()
// Send new session registration
datagram := newRegisterSessionDatagram(testRequestID)
quic.send <- datagram
// Wait for session registration response with success
datagram = <-quic.recv
var resp v3.UDPSessionRegistrationResponseDatagram
err := resp.UnmarshalBinary(datagram)
if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if !errors.Is(context.Cause(ctx), expectedContextCanceled) {
if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseOk {
t.Fatalf("expected registration response ok")
}
// Set the session manager to return already registered
sessionManager.expectedRegErr = v3.ErrSessionAlreadyRegistered
// Send the registration again as if we didn't receive it at the edge
datagram = newRegisterSessionDatagram(testRequestID)
quic.send <- datagram
// Wait for session registration response with success
datagram = <-quic.recv
err = resp.UnmarshalBinary(datagram)
if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseOk {
t.Fatalf("expected registration response ok")
}
// We expect the session to be served
timer := time.NewTimer(15 * time.Second)
defer timer.Stop()
select {
case <-session.served:
break
case <-timer.C:
t.Fatalf("expected session serve to be called")
}
// Cancel the muxer Serve context and make sure it closes with the expected error
assertContextClosed(t, ctx, done, cancel)
}
func TestDatagramConnServe_MigrateConnection(t *testing.T) {
log := zerolog.Nop()
quic := newMockQuicConn()
session := newMockSession()
sessionManager := mockSessionManager{session: &session}
conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
quic2 := newMockQuicConn()
conn2 := v3.NewDatagramConn(quic2, &sessionManager, 1, &log)
// Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background())
defer cancel(errors.New("other error"))
done := make(chan error, 1)
go func() {
done <- conn.Serve(ctx)
}()
ctx2, cancel2 := context.WithCancelCause(context.Background())
defer cancel2(errors.New("other error"))
done2 := make(chan error, 1)
go func() {
done2 <- conn2.Serve(ctx2)
}()
// Send new session registration
datagram := newRegisterSessionDatagram(testRequestID)
quic.send <- datagram
// Wait for session registration response with success
datagram = <-quic.recv
var resp v3.UDPSessionRegistrationResponseDatagram
err := resp.UnmarshalBinary(datagram)
if err != nil {
t.Fatal(err)
}
if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseOk {
t.Fatalf("expected registration response ok")
}
// Set the session manager to return already registered to another connection
sessionManager.expectedRegErr = v3.ErrSessionBoundToOtherConn
// Send the registration again as if we didn't receive it at the edge for a new connection
datagram = newRegisterSessionDatagram(testRequestID)
quic2.send <- datagram
// Wait for session registration response with success
datagram = <-quic2.recv
err = resp.UnmarshalBinary(datagram)
if err != nil {
t.Fatal(err)
}
if resp.RequestID != testRequestID || resp.ResponseType != v3.ResponseOk {
t.Fatalf("expected registration response ok")
}
// We expect the session to be served
timer := time.NewTimer(15 * time.Second)
defer timer.Stop()
select {
case <-session.served:
break
case <-timer.C:
t.Fatalf("expected session serve to be called")
}
// Expect session to be migrated
select {
case id := <-session.migrated:
if id != conn2.ID() {
t.Fatalf("expected session to be migrated to connection 2")
}
case <-timer.C:
t.Fatalf("expected session migration to be called")
}
// Cancel the muxer Serve context and make sure it closes with the expected error
assertContextClosed(t, ctx, done, cancel)
// Cancel the second muxer Serve context and make sure it closes with the expected error
assertContextClosed(t, ctx2, done2, cancel2)
} }
func TestDatagramConnServe_Payload_GetSessionError(t *testing.T) { func TestDatagramConnServe_Payload_GetSessionError(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
quic := newMockQuicConn() quic := newMockQuicConn()
// mockSessionManager will return the ErrSessionNotFound for any session attempting to be queried by the muxer
sessionManager := mockSessionManager{session: nil, expectedGetErr: v3.ErrSessionNotFound} sessionManager := mockSessionManager{session: nil, expectedGetErr: v3.ErrSessionNotFound}
conn := v3.NewDatagramConn(quic, &sessionManager, &log) conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
// Setup the muxer // Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background()) ctx, cancel := context.WithCancelCause(context.Background())
@ -319,15 +457,13 @@ func TestDatagramConnServe_Payload_GetSessionError(t *testing.T) {
datagram := newSessionPayloadDatagram(testRequestID, []byte{0xef, 0xef}) datagram := newSessionPayloadDatagram(testRequestID, []byte{0xef, 0xef})
quic.send <- datagram quic.send <- datagram
// Since the muxer should eventually discard a failed registration request, there is no side-effect
// that the registration was failed beyond the muxer accepting the registration request. As such, the
// test can only ensure that the quic.send channel was consumed and that the muxer closes normally
// afterwards with the expected context cancelled trigger.
// Cancel the muxer Serve context and make sure it closes with the expected error // Cancel the muxer Serve context and make sure it closes with the expected error
cancel(expectedContextCanceled) assertContextClosed(t, ctx, done, cancel)
err := <-done
if !errors.Is(err, context.Canceled) {
t.Fatal(err)
}
if !errors.Is(context.Cause(ctx), expectedContextCanceled) {
t.Fatal(err)
}
} }
func TestDatagramConnServe_Payload(t *testing.T) { func TestDatagramConnServe_Payload(t *testing.T) {
@ -335,7 +471,7 @@ func TestDatagramConnServe_Payload(t *testing.T) {
quic := newMockQuicConn() quic := newMockQuicConn()
session := newMockSession() session := newMockSession()
sessionManager := mockSessionManager{session: &session} sessionManager := mockSessionManager{session: &session}
conn := v3.NewDatagramConn(quic, &sessionManager, &log) conn := v3.NewDatagramConn(quic, &sessionManager, 0, &log)
// Setup the muxer // Setup the muxer
ctx, cancel := context.WithCancelCause(context.Background()) ctx, cancel := context.WithCancelCause(context.Background())
@ -357,14 +493,7 @@ func TestDatagramConnServe_Payload(t *testing.T) {
} }
// Cancel the muxer Serve context and make sure it closes with the expected error // Cancel the muxer Serve context and make sure it closes with the expected error
cancel(expectedContextCanceled) assertContextClosed(t, ctx, done, cancel)
err := <-done
if !errors.Is(err, context.Canceled) {
t.Fatal(err)
}
if !errors.Is(context.Cause(ctx), expectedContextCanceled) {
t.Fatal(err)
}
} }
func newRegisterSessionDatagram(id v3.RequestID) []byte { func newRegisterSessionDatagram(id v3.RequestID) []byte {
@ -402,6 +531,18 @@ func newSessionPayloadDatagram(id v3.RequestID, payload []byte) []byte {
return datagram return datagram
} }
// Cancel the provided context and make sure it closes with the expected cancellation error
func assertContextClosed(t *testing.T, ctx context.Context, done <-chan error, cancel context.CancelCauseFunc) {
cancel(expectedContextCanceled)
err := <-done
if !errors.Is(err, context.Canceled) {
t.Fatal(err)
}
if !errors.Is(context.Cause(ctx), expectedContextCanceled) {
t.Fatal(err)
}
}
type mockQuicConn struct { type mockQuicConn struct {
ctx context.Context ctx context.Context
send chan []byte send chan []byte
@ -454,7 +595,7 @@ type mockSessionManager struct {
expectedGetErr error expectedGetErr error
} }
func (m *mockSessionManager) RegisterSession(request *v3.UDPSessionRegistrationDatagram, conn v3.DatagramWriter) (v3.Session, error) { func (m *mockSessionManager) RegisterSession(request *v3.UDPSessionRegistrationDatagram, conn v3.DatagramConn) (v3.Session, error) {
return m.session, m.expectedRegErr return m.session, m.expectedRegErr
} }
@ -466,12 +607,14 @@ func (m *mockSessionManager) UnregisterSession(requestID v3.RequestID) {}
type mockSession struct { type mockSession struct {
served chan struct{} served chan struct{}
migrated chan uint8
recv chan []byte recv chan []byte
} }
func newMockSession() mockSession { func newMockSession() mockSession {
return mockSession{ return mockSession{
served: make(chan struct{}), served: make(chan struct{}),
migrated: make(chan uint8, 2),
recv: make(chan []byte, 1), recv: make(chan []byte, 1),
} }
} }
@ -480,6 +623,13 @@ func (m *mockSession) ID() v3.RequestID {
return testRequestID return testRequestID
} }
func (m *mockSession) ConnectionID() uint8 {
return 0
}
func (m *mockSession) Migrate(conn v3.DatagramConn) { m.migrated <- conn.ID() }
func (m *mockSession) ResetIdleTimer() {}
func (m *mockSession) Serve(ctx context.Context) error { func (m *mockSession) Serve(ctx context.Context) error {
close(m.served) close(m.served)
return v3.SessionCloseErr return v3.SessionCloseErr

View File

@ -7,6 +7,7 @@ import (
"io" "io"
"net" "net"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/rs/zerolog" "github.com/rs/zerolog"
@ -47,6 +48,9 @@ func newSessionIdleErr(timeout time.Duration) error {
type Session interface { type Session interface {
io.WriteCloser io.WriteCloser
ID() RequestID ID() RequestID
ConnectionID() uint8
ResetIdleTimer()
Migrate(eyeball DatagramConn)
// Serve starts the event loop for processing UDP packets // Serve starts the event loop for processing UDP packets
Serve(ctx context.Context) error Serve(ctx context.Context) error
} }
@ -55,31 +59,48 @@ type session struct {
id RequestID id RequestID
closeAfterIdle time.Duration closeAfterIdle time.Duration
origin io.ReadWriteCloser origin io.ReadWriteCloser
eyeball DatagramWriter eyeball atomic.Pointer[DatagramConn]
// activeAtChan is used to communicate the last read/write time // activeAtChan is used to communicate the last read/write time
activeAtChan chan time.Time activeAtChan chan time.Time
closeChan chan error closeChan chan error
log *zerolog.Logger log *zerolog.Logger
} }
func NewSession(id RequestID, closeAfterIdle time.Duration, origin io.ReadWriteCloser, eyeball DatagramWriter, log *zerolog.Logger) Session { func NewSession(id RequestID, closeAfterIdle time.Duration, origin io.ReadWriteCloser, eyeball DatagramConn, log *zerolog.Logger) Session {
return &session{ session := &session{
id: id, id: id,
closeAfterIdle: closeAfterIdle, closeAfterIdle: closeAfterIdle,
origin: origin, origin: origin,
eyeball: eyeball, eyeball: atomic.Pointer[DatagramConn]{},
// activeAtChan has low capacity. It can be full when there are many concurrent read/write. markActive() will // activeAtChan has low capacity. It can be full when there are many concurrent read/write. markActive() will
// drop instead of blocking because last active time only needs to be an approximation // drop instead of blocking because last active time only needs to be an approximation
activeAtChan: make(chan time.Time, 1), activeAtChan: make(chan time.Time, 1),
closeChan: make(chan error, 1), closeChan: make(chan error, 1),
log: log, log: log,
} }
session.eyeball.Store(&eyeball)
return session
} }
func (s *session) ID() RequestID { func (s *session) ID() RequestID {
return s.id return s.id
} }
func (s *session) ConnectionID() uint8 {
eyeball := *(s.eyeball.Load())
return eyeball.ID()
}
func (s *session) Migrate(eyeball DatagramConn) {
current := *(s.eyeball.Load())
// Only migrate if the connection ids are different.
if current.ID() != eyeball.ID() {
s.eyeball.Store(&eyeball)
}
// The session is already running so we want to restart the idle timeout since no proxied packets have come down yet.
s.markActive()
}
func (s *session) Serve(ctx context.Context) error { func (s *session) Serve(ctx context.Context) error {
go func() { go func() {
// QUIC implementation copies data to another buffer before returning https://github.com/quic-go/quic-go/blob/v0.24.0/session.go#L1967-L1975 // QUIC implementation copies data to another buffer before returning https://github.com/quic-go/quic-go/blob/v0.24.0/session.go#L1967-L1975
@ -107,9 +128,12 @@ func (s *session) Serve(ctx context.Context) error {
s.log.Error().Int("packetSize", n).Msg("Session (origin) packet read was too large and was dropped") s.log.Error().Int("packetSize", n).Msg("Session (origin) packet read was too large and was dropped")
continue continue
} }
// We need to synchronize on the eyeball in-case that the connection was migrated. This should be rarely a point
// of lock contention, as a migration can only happen during startup of a session before traffic flow.
eyeball := *(s.eyeball.Load())
// Sending a packet to the session does block on the [quic.Connection], however, this is okay because it // Sending a packet to the session does block on the [quic.Connection], however, this is okay because it
// will cause back-pressure to the kernel buffer if the writes are not fast enough to the edge. // will cause back-pressure to the kernel buffer if the writes are not fast enough to the edge.
err = s.eyeball.SendUDPSessionDatagram(readBuffer[:DatagramPayloadHeaderLen+n]) err = eyeball.SendUDPSessionDatagram(readBuffer[:DatagramPayloadHeaderLen+n])
if err != nil { if err != nil {
s.closeChan <- err s.closeChan <- err
return return
@ -137,6 +161,14 @@ func (s *session) Write(payload []byte) (n int, err error) {
return n, err return n, err
} }
// ResetIdleTimer will restart the current idle timer.
//
// This public method is used to allow operators of sessions the ability to extend the session using information that is
// known external to the session itself.
func (s *session) ResetIdleTimer() {
s.markActive()
}
// Sends the last active time to the idle checker loop without blocking. activeAtChan will only be full when there // Sends the last active time to the idle checker loop without blocking. activeAtChan will only be full when there
// are many concurrent read/write. It is fine to lose some precision // are many concurrent read/write. It is fine to lose some precision
func (s *session) markActive() { func (s *session) markActive() {

View File

@ -123,6 +123,45 @@ func TestSessionServe_OriginTooLarge(t *testing.T) {
} }
} }
func TestSessionServe_Migrate(t *testing.T) {
log := zerolog.Nop()
eyeball := newMockEyeball()
pipe1, pipe2 := net.Pipe()
session := v3.NewSession(testRequestID, 2*time.Second, pipe2, &eyeball, &log)
defer session.Close()
done := make(chan error)
go func() {
done <- session.Serve(context.Background())
}()
// Migrate the session to a new connection before origin sends data
eyeball2 := newMockEyeball()
eyeball2.connID = 1
session.Migrate(&eyeball2)
// Origin sends data
payload2 := []byte{0xde}
pipe1.Write(payload2)
// Expect write to eyeball2
data := <-eyeball2.recvData
if len(data) <= 17 || !slices.Equal(payload2, data[17:]) {
t.Fatalf("expected data to write to eyeball2 after migration: %+v", data)
}
select {
case data := <-eyeball.recvData:
t.Fatalf("expected no data to write to eyeball1 after migration: %+v", data)
default:
}
err := <-done
if !errors.Is(err, v3.SessionIdleErr{}) {
t.Error(err)
}
}
func TestSessionClose_Multiple(t *testing.T) { func TestSessionClose_Multiple(t *testing.T) {
log := zerolog.Nop() log := zerolog.Nop()
origin := newTestOrigin(makePayload(128)) origin := newTestOrigin(makePayload(128))
@ -249,7 +288,7 @@ func newTestIdleOrigin(d time.Duration) testIdleOrigin {
func (o *testIdleOrigin) Read(p []byte) (n int, err error) { func (o *testIdleOrigin) Read(p []byte) (n int, err error) {
time.Sleep(o.duration) time.Sleep(o.duration)
return 0, nil return -1, nil
} }
func (o *testIdleOrigin) Write(p []byte) (n int, err error) { func (o *testIdleOrigin) Write(p []byte) (n int, err error) {

View File

@ -606,6 +606,7 @@ func (e *EdgeTunnelServer) serveQUIC(
ctx, ctx,
conn, conn,
e.sessionManager, e.sessionManager,
connIndex,
connLogger.Logger(), connLogger.Logger(),
) )
} else { } else {