TUN-6720: Remove forcibly closing connection during reconnect signal
Previously allowing the reconnect signal forcibly close the connection caused a race condition on which error was returned by the errgroup in the tunnel connection. Allowing the signal to return and provide a context cancel to the connection provides a safer shutdown of the tunnel for this test-only scenario.
This commit is contained in:
parent
8ec0f7746b
commit
cfef0e737f
|
@ -375,7 +375,7 @@ func StartServer(
|
||||||
errC <- metrics.ServeMetrics(metricsListener, ctx.Done(), readinessServer, quickTunnelURL, orchestrator, log)
|
errC <- metrics.ServeMetrics(metricsListener, ctx.Done(), readinessServer, quickTunnelURL, orchestrator, log)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
reconnectCh := make(chan supervisor.ReconnectSignal, 1)
|
reconnectCh := make(chan supervisor.ReconnectSignal, c.Int("ha-connections"))
|
||||||
if c.IsSet("stdin-control") {
|
if c.IsSet("stdin-control") {
|
||||||
log.Info().Msg("Enabling control through stdin")
|
log.Info().Msg("Enabling control through stdin")
|
||||||
go stdinControl(reconnectCh, log)
|
go stdinControl(reconnectCh, log)
|
||||||
|
|
|
@ -47,17 +47,12 @@ class TestReconnect:
|
||||||
cloudflared.stdin.flush()
|
cloudflared.stdin.flush()
|
||||||
|
|
||||||
def assert_reconnect(self, config, cloudflared, repeat):
|
def assert_reconnect(self, config, cloudflared, repeat):
|
||||||
wait_tunnel_ready(tunnel_url=config.get_url(), require_min_connections=self.default_ha_conns)
|
wait_tunnel_ready(tunnel_url=config.get_url(),
|
||||||
|
require_min_connections=self.default_ha_conns)
|
||||||
for _ in range(repeat):
|
for _ in range(repeat):
|
||||||
for i in range(self.default_ha_conns):
|
for _ in range(self.default_ha_conns):
|
||||||
self.send_reconnect(cloudflared, self.default_reconnect_secs)
|
self.send_reconnect(cloudflared, self.default_reconnect_secs)
|
||||||
expect_connections = self.default_ha_conns-i-1
|
check_tunnel_not_connected()
|
||||||
if expect_connections > 0:
|
|
||||||
# Don't check if tunnel returns 200 here because there is a race condition between wait_tunnel_ready
|
|
||||||
# retrying to get 200 response and reconnecting
|
|
||||||
wait_tunnel_ready(require_min_connections=expect_connections)
|
|
||||||
else:
|
|
||||||
check_tunnel_not_connected()
|
|
||||||
|
|
||||||
sleep(self.default_reconnect_secs * 2)
|
sleep(self.default_reconnect_secs * 2)
|
||||||
wait_tunnel_ready(tunnel_url=config.get_url(), require_min_connections=self.default_ha_conns)
|
wait_tunnel_ready(tunnel_url=config.get_url(),
|
||||||
|
require_min_connections=self.default_ha_conns)
|
||||||
|
|
|
@ -15,6 +15,7 @@ from constants import METRICS_PORT, MAX_RETRIES, BACKOFF_SECS
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def select_platform(plat):
|
def select_platform(plat):
|
||||||
return pytest.mark.skipif(
|
return pytest.mark.skipif(
|
||||||
platform.system() != plat, reason=f"Only runs on {plat}")
|
platform.system() != plat, reason=f"Only runs on {plat}")
|
||||||
|
@ -108,13 +109,15 @@ def _log_cloudflared_logs(cfd_logs):
|
||||||
LOGGER.warning(line)
|
LOGGER.warning(line)
|
||||||
|
|
||||||
|
|
||||||
@retry(stop_max_attempt_number=MAX_RETRIES * BACKOFF_SECS, wait_fixed=1000)
|
@retry(stop_max_attempt_number=MAX_RETRIES, wait_fixed=BACKOFF_SECS * 1000)
|
||||||
def check_tunnel_not_connected():
|
def check_tunnel_not_connected():
|
||||||
url = f'http://localhost:{METRICS_PORT}/ready'
|
url = f'http://localhost:{METRICS_PORT}/ready'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = requests.get(url, timeout=1)
|
resp = requests.get(url, timeout=BACKOFF_SECS)
|
||||||
assert resp.status_code == 503, f"Expect {url} returns 503, got {resp.status_code}"
|
assert resp.status_code == 503, f"Expect {url} returns 503, got {resp.status_code}"
|
||||||
|
assert resp.json()[
|
||||||
|
"readyConnections"] == 0, "Expected all connections to be terminated (pending reconnect)"
|
||||||
# cloudflared might already terminate
|
# cloudflared might already terminate
|
||||||
except requests.exceptions.ConnectionError as e:
|
except requests.exceptions.ConnectionError as e:
|
||||||
LOGGER.warning(f"Failed to connect to {url}, error: {e}")
|
LOGGER.warning(f"Failed to connect to {url}, error: {e}")
|
||||||
|
|
|
@ -295,8 +295,7 @@ func (s *Supervisor) initialize(
|
||||||
s.config.ProtocolSelector.Current(),
|
s.config.ProtocolSelector.Current(),
|
||||||
false,
|
false,
|
||||||
}
|
}
|
||||||
ch := signal.New(make(chan struct{}))
|
go s.startTunnel(ctx, i, s.newConnectedTunnelSignal(i))
|
||||||
go s.startTunnel(ctx, i, ch)
|
|
||||||
time.Sleep(registrationInterval)
|
time.Sleep(registrationInterval)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -546,7 +546,13 @@ func (e *EdgeTunnelServer) serveH2mux(
|
||||||
})
|
})
|
||||||
|
|
||||||
errGroup.Go(func() error {
|
errGroup.Go(func() error {
|
||||||
return listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
||||||
|
if err != nil {
|
||||||
|
// forcefully break the connection (this is only used for testing)
|
||||||
|
// errgroup will return context canceled for the handler.ServeClassicTunnel
|
||||||
|
connLog.Logger().Debug().Msg("Forcefully breaking h2mux connection")
|
||||||
|
}
|
||||||
|
return err
|
||||||
})
|
})
|
||||||
|
|
||||||
return errGroup.Wait()
|
return errGroup.Wait()
|
||||||
|
@ -580,8 +586,8 @@ func (e *EdgeTunnelServer) serveHTTP2(
|
||||||
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// forcefully break the connection (this is only used for testing)
|
// forcefully break the connection (this is only used for testing)
|
||||||
|
// errgroup will return context canceled for the h2conn.Serve
|
||||||
connLog.Logger().Debug().Msg("Forcefully breaking http2 connection")
|
connLog.Logger().Debug().Msg("Forcefully breaking http2 connection")
|
||||||
_ = tlsServerConn.Close()
|
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
@ -636,8 +642,8 @@ func (e *EdgeTunnelServer) serveQUIC(
|
||||||
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// forcefully break the connection (this is only used for testing)
|
// forcefully break the connection (this is only used for testing)
|
||||||
|
// errgroup will return context canceled for the quicConn.Serve
|
||||||
connLogger.Logger().Debug().Msg("Forcefully breaking quic connection")
|
connLogger.Logger().Debug().Msg("Forcefully breaking quic connection")
|
||||||
quicConn.Close()
|
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in New Issue