TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case: 1. when faced with QUIC "no network activity", give up re-attempts and fall-back 2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback The reasoning for 1. was to fallback quickly in situations where the user may not have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record), but the users' environment does not allow egress via UDP. The reasoning for 2. was to avoid falling back if the user explicitly chooses a protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if we fallback to HTTP2 protocol that will be unexpected since it does not support UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying). This PR fixes the edge case that happens when both those changes 1. and 2. are put together: when faced with a QUIC "no network activity", we should only try to fallback if there is a possible fallback. Otherwise, we should exhaust the retries as normal.
This commit is contained in:
		
							parent
							
								
									8a5343d0a5
								
							
						
					
					
						commit
						7bac4b15b0
					
				|  | @ -185,12 +185,11 @@ func ServeTunnelLoop( | ||||||
| 		case <-gracefulShutdownC: | 		case <-gracefulShutdownC: | ||||||
| 			return nil | 			return nil | ||||||
| 		case <-protocolFallback.BackoffTimer(): | 		case <-protocolFallback.BackoffTimer(): | ||||||
| 			var idleTimeoutError *quic.IdleTimeoutError |  | ||||||
| 			if !selectNextProtocol( | 			if !selectNextProtocol( | ||||||
| 				connLog.Logger(), | 				connLog.Logger(), | ||||||
| 				protocolFallback, | 				protocolFallback, | ||||||
| 				config.ProtocolSelector, | 				config.ProtocolSelector, | ||||||
| 				errors.As(err, &idleTimeoutError), | 				err, | ||||||
| 			) { | 			) { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
|  | @ -223,9 +222,13 @@ func selectNextProtocol( | ||||||
| 	connLog *zerolog.Logger, | 	connLog *zerolog.Logger, | ||||||
| 	protocolBackoff *protocolFallback, | 	protocolBackoff *protocolFallback, | ||||||
| 	selector connection.ProtocolSelector, | 	selector connection.ProtocolSelector, | ||||||
| 	isNetworkActivityTimeout bool, | 	cause error, | ||||||
| ) bool { | ) bool { | ||||||
| 	if protocolBackoff.ReachedMaxRetries() || isNetworkActivityTimeout { | 	var idleTimeoutError *quic.IdleTimeoutError | ||||||
|  | 	isNetworkActivityTimeout := errors.As(cause, &idleTimeoutError) | ||||||
|  | 	_, hasFallback := selector.Fallback() | ||||||
|  | 
 | ||||||
|  | 	if protocolBackoff.ReachedMaxRetries() || (hasFallback && isNetworkActivityTimeout) { | ||||||
| 		fallback, hasFallback := selector.Fallback() | 		fallback, hasFallback := selector.Fallback() | ||||||
| 		if !hasFallback { | 		if !hasFallback { | ||||||
| 			return false | 			return false | ||||||
|  |  | ||||||
|  | @ -4,6 +4,7 @@ import ( | ||||||
| 	"testing" | 	"testing" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
|  | 	"github.com/lucas-clemente/quic-go" | ||||||
| 	"github.com/rs/zerolog" | 	"github.com/rs/zerolog" | ||||||
| 	"github.com/stretchr/testify/assert" | 	"github.com/stretchr/testify/assert" | ||||||
| 
 | 
 | ||||||
|  | @ -53,7 +54,7 @@ func TestWaitForBackoffFallback(t *testing.T) { | ||||||
| 	initProtocol := protocolSelector.Current() | 	initProtocol := protocolSelector.Current() | ||||||
| 	assert.Equal(t, connection.HTTP2, initProtocol) | 	assert.Equal(t, connection.HTTP2, initProtocol) | ||||||
| 
 | 
 | ||||||
| 	protocolFallback := &protocolFallback{ | 	protoFallback := &protocolFallback{ | ||||||
| 		backoff, | 		backoff, | ||||||
| 		initProtocol, | 		initProtocol, | ||||||
| 		false, | 		false, | ||||||
|  | @ -61,40 +62,63 @@ func TestWaitForBackoffFallback(t *testing.T) { | ||||||
| 
 | 
 | ||||||
| 	// Retry #0 and #1. At retry #2, we switch protocol, so the fallback loop has one more retry than this
 | 	// Retry #0 and #1. At retry #2, we switch protocol, so the fallback loop has one more retry than this
 | ||||||
| 	for i := 0; i < int(maxRetries-1); i++ { | 	for i := 0; i < int(maxRetries-1); i++ { | ||||||
| 		protocolFallback.BackoffTimer() // simulate retry
 | 		protoFallback.BackoffTimer() // simulate retry
 | ||||||
| 		ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false) | 		ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil) | ||||||
| 		assert.True(t, ok) | 		assert.True(t, ok) | ||||||
| 		assert.Equal(t, initProtocol, protocolFallback.protocol) | 		assert.Equal(t, initProtocol, protoFallback.protocol) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Retry fallback protocol
 | 	// Retry fallback protocol
 | ||||||
| 	for i := 0; i < int(maxRetries); i++ { | 	for i := 0; i < int(maxRetries); i++ { | ||||||
| 		protocolFallback.BackoffTimer() // simulate retry
 | 		protoFallback.BackoffTimer() // simulate retry
 | ||||||
| 		ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false) | 		ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil) | ||||||
| 		assert.True(t, ok) | 		assert.True(t, ok) | ||||||
| 		fallback, ok := protocolSelector.Fallback() | 		fallback, ok := protocolSelector.Fallback() | ||||||
| 		assert.True(t, ok) | 		assert.True(t, ok) | ||||||
| 		assert.Equal(t, fallback, protocolFallback.protocol) | 		assert.Equal(t, fallback, protoFallback.protocol) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	currentGlobalProtocol := protocolSelector.Current() | 	currentGlobalProtocol := protocolSelector.Current() | ||||||
| 	assert.Equal(t, initProtocol, currentGlobalProtocol) | 	assert.Equal(t, initProtocol, currentGlobalProtocol) | ||||||
| 
 | 
 | ||||||
| 	// No protocol to fallback, return error
 | 	// No protocol to fallback, return error
 | ||||||
| 	protocolFallback.BackoffTimer() // simulate retry
 | 	protoFallback.BackoffTimer() // simulate retry
 | ||||||
| 	ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false) | 	ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil) | ||||||
| 	assert.False(t, ok) | 	assert.False(t, ok) | ||||||
| 
 | 
 | ||||||
| 	protocolFallback.reset() | 	protoFallback.reset() | ||||||
| 	protocolFallback.BackoffTimer() // simulate retry
 | 	protoFallback.BackoffTimer() // simulate retry
 | ||||||
| 	ok = selectNextProtocol(&log, protocolFallback, protocolSelector, false) | 	ok = selectNextProtocol(&log, protoFallback, protocolSelector, nil) | ||||||
| 	assert.True(t, ok) | 	assert.True(t, ok) | ||||||
| 	assert.Equal(t, initProtocol, protocolFallback.protocol) | 	assert.Equal(t, initProtocol, protoFallback.protocol) | ||||||
| 
 | 
 | ||||||
| 	protocolFallback.reset() | 	protoFallback.reset() | ||||||
| 	protocolFallback.BackoffTimer() // simulate retry
 | 	protoFallback.BackoffTimer() // simulate retry
 | ||||||
| 	ok = selectNextProtocol(&log, protocolFallback, protocolSelector, true) | 	ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{}) | ||||||
| 	// Check that we get a true after the first try itself when this flag is true. This allows us to immediately
 | 	// Check that we get a true after the first try itself when this flag is true. This allows us to immediately
 | ||||||
| 	// switch protocols.
 | 	// switch protocols when there is a fallback.
 | ||||||
| 	assert.True(t, ok) | 	assert.True(t, ok) | ||||||
|  | 
 | ||||||
|  | 	// But if there is no fallback available, then we exhaust the retries despite the type of error.
 | ||||||
|  | 	// The reason why there's no fallback available is because we pick a specific protocol instead of letting it be auto.
 | ||||||
|  | 	protocolSelector, err = connection.NewProtocolSelector( | ||||||
|  | 		"quic", | ||||||
|  | 		warpRoutingEnabled, | ||||||
|  | 		namedTunnel, | ||||||
|  | 		mockFetcher.fetch(), | ||||||
|  | 		resolveTTL, | ||||||
|  | 		&log, | ||||||
|  | 	) | ||||||
|  | 	assert.NoError(t, err) | ||||||
|  | 	protoFallback = &protocolFallback{backoff, protocolSelector.Current(), false} | ||||||
|  | 	for i := 0; i < int(maxRetries-1); i++ { | ||||||
|  | 		protoFallback.BackoffTimer() // simulate retry
 | ||||||
|  | 		ok := selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{}) | ||||||
|  | 		assert.True(t, ok) | ||||||
|  | 		assert.Equal(t, connection.QUIC, protoFallback.protocol) | ||||||
|  | 	} | ||||||
|  | 	// And finally it fails as it should, with no fallback.
 | ||||||
|  | 	protoFallback.BackoffTimer() | ||||||
|  | 	ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{}) | ||||||
|  | 	assert.False(t, ok) | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue