2022-02-07 09:42:07 +00:00
|
|
|
package supervisor
|
2020-10-14 13:42:00 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2023-05-06 00:42:41 +00:00
|
|
|
"github.com/quic-go/quic-go"
|
2020-11-25 06:55:13 +00:00
|
|
|
"github.com/rs/zerolog"
|
2020-10-14 13:42:00 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2021-03-23 14:30:43 +00:00
|
|
|
|
|
|
|
"github.com/cloudflare/cloudflared/connection"
|
2021-10-11 10:31:05 +00:00
|
|
|
"github.com/cloudflare/cloudflared/edgediscovery"
|
2021-03-26 04:04:56 +00:00
|
|
|
"github.com/cloudflare/cloudflared/retry"
|
2020-10-14 13:42:00 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type dynamicMockFetcher struct {
|
2021-10-11 10:31:05 +00:00
|
|
|
protocolPercents edgediscovery.ProtocolPercents
|
|
|
|
err error
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
|
2023-02-06 19:06:02 +00:00
|
|
|
func (dmf *dynamicMockFetcher) fetch() edgediscovery.PercentageFetcher {
|
2021-10-11 10:31:05 +00:00
|
|
|
return func() (edgediscovery.ProtocolPercents, error) {
|
|
|
|
return dmf.protocolPercents, dmf.err
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-05 00:07:49 +00:00
|
|
|
|
2024-05-23 16:48:34 +00:00
|
|
|
func immediateTimeAfter(time.Duration) <-chan time.Time {
|
|
|
|
c := make(chan time.Time, 1)
|
|
|
|
c <- time.Now()
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2020-10-14 13:42:00 +00:00
|
|
|
func TestWaitForBackoffFallback(t *testing.T) {
|
|
|
|
maxRetries := uint(3)
|
2024-05-23 16:48:34 +00:00
|
|
|
backoff := retry.NewBackoff(maxRetries, 40*time.Millisecond, false)
|
|
|
|
backoff.Clock.After = immediateTimeAfter
|
2020-11-25 06:55:13 +00:00
|
|
|
log := zerolog.Nop()
|
2024-05-23 16:48:34 +00:00
|
|
|
resolveTTL := 10 * time.Second
|
2020-10-14 13:42:00 +00:00
|
|
|
mockFetcher := dynamicMockFetcher{
|
2023-02-06 19:06:02 +00:00
|
|
|
protocolPercents: edgediscovery.ProtocolPercents{edgediscovery.ProtocolPercent{Protocol: "quic", Percentage: 100}},
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
2020-11-25 06:55:13 +00:00
|
|
|
protocolSelector, err := connection.NewProtocolSelector(
|
2022-01-05 17:58:49 +00:00
|
|
|
"auto",
|
2023-02-06 19:06:02 +00:00
|
|
|
"",
|
|
|
|
false,
|
|
|
|
false,
|
2020-11-25 06:55:13 +00:00
|
|
|
mockFetcher.fetch(),
|
|
|
|
resolveTTL,
|
|
|
|
&log,
|
|
|
|
)
|
2020-10-14 13:42:00 +00:00
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
initProtocol := protocolSelector.Current()
|
2023-02-06 19:06:02 +00:00
|
|
|
assert.Equal(t, connection.QUIC, initProtocol)
|
2020-10-14 13:42:00 +00:00
|
|
|
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
protoFallback := &protocolFallback{
|
2020-10-14 13:42:00 +00:00
|
|
|
backoff,
|
|
|
|
initProtocol,
|
|
|
|
false,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Retry #0 and #1. At retry #2, we switch protocol, so the fallback loop has one more retry than this
|
|
|
|
for i := 0; i < int(maxRetries-1); i++ {
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
protoFallback.BackoffTimer() // simulate retry
|
|
|
|
ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil)
|
2021-02-05 00:07:49 +00:00
|
|
|
assert.True(t, ok)
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
assert.Equal(t, initProtocol, protoFallback.protocol)
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Retry fallback protocol
|
2024-05-23 16:48:34 +00:00
|
|
|
protoFallback.BackoffTimer() // simulate retry
|
|
|
|
ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil)
|
|
|
|
assert.True(t, ok)
|
|
|
|
fallback, ok := protocolSelector.Fallback()
|
|
|
|
assert.True(t, ok)
|
|
|
|
assert.Equal(t, fallback, protoFallback.protocol)
|
|
|
|
assert.Equal(t, connection.HTTP2, protoFallback.protocol)
|
2020-10-14 13:42:00 +00:00
|
|
|
|
|
|
|
currentGlobalProtocol := protocolSelector.Current()
|
|
|
|
assert.Equal(t, initProtocol, currentGlobalProtocol)
|
|
|
|
|
2024-05-23 16:48:34 +00:00
|
|
|
// Simulate max retries again (retries reset after protocol switch)
|
|
|
|
for i := 0; i < int(maxRetries); i++ {
|
|
|
|
protoFallback.BackoffTimer()
|
|
|
|
}
|
2020-10-14 13:42:00 +00:00
|
|
|
// No protocol to fallback, return error
|
2024-05-23 16:48:34 +00:00
|
|
|
ok = selectNextProtocol(&log, protoFallback, protocolSelector, nil)
|
2021-02-05 00:07:49 +00:00
|
|
|
assert.False(t, ok)
|
2020-10-14 13:42:00 +00:00
|
|
|
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
protoFallback.reset()
|
|
|
|
protoFallback.BackoffTimer() // simulate retry
|
|
|
|
ok = selectNextProtocol(&log, protoFallback, protocolSelector, nil)
|
2021-02-05 00:07:49 +00:00
|
|
|
assert.True(t, ok)
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
assert.Equal(t, initProtocol, protoFallback.protocol)
|
2021-11-03 12:06:04 +00:00
|
|
|
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
protoFallback.reset()
|
|
|
|
protoFallback.BackoffTimer() // simulate retry
|
|
|
|
ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
|
2021-11-03 12:06:04 +00:00
|
|
|
// Check that we get a true after the first try itself when this flag is true. This allows us to immediately
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
// switch protocols when there is a fallback.
|
2021-11-03 12:06:04 +00:00
|
|
|
assert.True(t, ok)
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
|
|
|
|
// But if there is no fallback available, then we exhaust the retries despite the type of error.
|
|
|
|
// The reason why there's no fallback available is because we pick a specific protocol instead of letting it be auto.
|
|
|
|
protocolSelector, err = connection.NewProtocolSelector(
|
|
|
|
"quic",
|
2023-02-06 19:06:02 +00:00
|
|
|
"",
|
|
|
|
false,
|
|
|
|
false,
|
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case:
1. when faced with QUIC "no network activity", give up re-attempts and fall-back
2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback
The reasoning for 1. was to fallback quickly in situations where the user may not
have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record),
but the users' environment does not allow egress via UDP.
The reasoning for 2. was to avoid falling back if the user explicitly chooses a
protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if
we fallback to HTTP2 protocol that will be unexpected since it does not support
UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying).
This PR fixes the edge case that happens when both those changes 1. and 2. are
put together: when faced with a QUIC "no network activity", we should only try
to fallback if there is a possible fallback. Otherwise, we should exhaust the
retries as normal.
2022-01-27 22:12:25 +00:00
|
|
|
mockFetcher.fetch(),
|
|
|
|
resolveTTL,
|
|
|
|
&log,
|
|
|
|
)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
protoFallback = &protocolFallback{backoff, protocolSelector.Current(), false}
|
|
|
|
for i := 0; i < int(maxRetries-1); i++ {
|
|
|
|
protoFallback.BackoffTimer() // simulate retry
|
|
|
|
ok := selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
|
|
|
|
assert.True(t, ok)
|
|
|
|
assert.Equal(t, connection.QUIC, protoFallback.protocol)
|
|
|
|
}
|
|
|
|
// And finally it fails as it should, with no fallback.
|
|
|
|
protoFallback.BackoffTimer()
|
|
|
|
ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
|
|
|
|
assert.False(t, ok)
|
2020-10-14 13:42:00 +00:00
|
|
|
}
|