Merge branch 'cloudflare:master' into tunnel-health

This commit is contained in:
Mads Jon Nielsen 2024-10-12 19:21:03 +02:00 committed by GitHub
commit f2016e7f63
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 302 additions and 30 deletions

View File

@ -1,3 +1,7 @@
## 2024.10.0
### Bug Fixes
- We fixed a bug related to `--grace-period`. Tunnels that use QUIC as transport weren't abiding by this waiting period before forcefully closing the connections to the edge. From now on, both QUIC and HTTP2 tunnels will wait for either the grace period to end (defaults to 30 seconds) or until the last in-flight request is handled. Users that wish to maintain the previous behavior should set `--grace-period` to 0 if `--protocol` is set to `quic`. This will force `cloudflared` to shutdown as soon as either SIGTERM or SIGINT is received.
## 2024.2.1
### Notices
- Starting from this version, tunnel diagnostics will be enabled by default. This will allow the engineering team to remotely get diagnostics from cloudflared during debug activities. Users still have the capability to opt-out of this feature by defining `--management-diagnostics=false` (or env `TUNNEL_MANAGEMENT_DIAGNOSTICS`).

View File

@ -1,3 +1,15 @@
2024.10.0
- 2024-10-01 TUN-8646: Add datagram v3 support feature flag
- 2024-09-30 TUN-8621: Fix cloudflared version in change notes to account for release date
- 2024-09-19 Adding semgrep yaml file
- 2024-09-12 TUN-8632: Delay checking auto-update by the provided frequency
- 2024-09-11 TUN-8630: Check checksum of downloaded binary to compare to current for auto-updating
- 2024-09-09 TUN-8629: Cloudflared update on Windows requires running it twice to update
- 2024-09-06 PPIP-2310: Update quick tunnel disclaimer
- 2024-08-30 TUN-8621: Prevent QUIC connection from closing before grace period after unregistering
- 2024-08-09 TUN-8592: Use metadata from the edge to determine if request body is empty for QUIC transport
- 2024-06-26 TUN-8484: Print response when QuickTunnel can't be unmarshalled
2024.9.1
- 2024-09-10 Revert Release 2024.9.0

View File

@ -3,6 +3,7 @@ package tunnel
import (
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@ -15,10 +16,7 @@ import (
const httpTimeout = 15 * time.Second
const disclaimer = "Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to" +
" experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee. If you " +
"intend to use Tunnels in production you should use a pre-created named tunnel by following: " +
"https://developers.cloudflare.com/cloudflare-one/connections/connect-apps"
const disclaimer = "Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps"
// RunQuickTunnel requests a tunnel from the specified service.
// We use this to power quick tunnels on trycloudflare.com, but the
@ -47,8 +45,17 @@ func RunQuickTunnel(sc *subcommandContext) error {
}
defer resp.Body.Close()
// This will read the entire response into memory so we can print it in case of error
rsp_body, err := io.ReadAll(resp.Body)
if err != nil {
return errors.Wrap(err, "failed to read quick-tunnel response")
}
var data QuickTunnelResponse
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
if err := json.Unmarshal(rsp_body, &data); err != nil {
rsp_string := string(rsp_body)
fields := map[string]interface{}{"status_code": resp.Status}
sc.log.Err(err).Fields(fields).Msgf("Error unmarshaling QuickTunnel response: %s", rsp_string)
return errors.Wrap(err, "failed to unmarshal quick Tunnel")
}

View File

@ -45,9 +45,10 @@ class TestTermination:
with connected:
connected.wait(self.timeout)
# Send signal after the SSE connection is established
self.terminate_by_signal(cloudflared, signal)
self.wait_eyeball_thread(
in_flight_req, self.grace_period + self.timeout)
with self.within_grace_period():
self.terminate_by_signal(cloudflared, signal)
self.wait_eyeball_thread(
in_flight_req, self.grace_period + self.timeout)
# test cloudflared terminates before grace period expires when all eyeball
# connections are drained
@ -66,7 +67,7 @@ class TestTermination:
with connected:
connected.wait(self.timeout)
with self.within_grace_period():
with self.within_grace_period(has_connection=False):
# Send signal after the SSE connection is established
self.terminate_by_signal(cloudflared, signal)
self.wait_eyeball_thread(in_flight_req, self.grace_period)
@ -78,7 +79,7 @@ class TestTermination:
with start_cloudflared(
tmp_path, config, cfd_pre_args=["tunnel", "--ha-connections", "1"], new_process=True, capture_output=False) as cloudflared:
wait_tunnel_ready(tunnel_url=config.get_url())
with self.within_grace_period():
with self.within_grace_period(has_connection=False):
self.terminate_by_signal(cloudflared, signal)
def terminate_by_signal(self, cloudflared, sig):
@ -92,13 +93,21 @@ class TestTermination:
# Using this context asserts logic within the context is executed within grace period
@contextmanager
def within_grace_period(self):
def within_grace_period(self, has_connection=True):
try:
start = time.time()
yield
finally:
# If the request takes longer than the grace period then we need to wait at most the grace period.
# If the request fell within the grace period cloudflared can close earlier, but to ensure that it doesn't
# close immediately we add a minimum boundary. If cloudflared shutdown in less than 1s it's likely that
# it shutdown as soon as it received SIGINT. The only way cloudflared can close immediately is if it has no
# in-flight requests
minimum = 1 if has_connection else 0
duration = time.time() - start
assert duration < self.grace_period
# Here we truncate to ensure that we don't fail on minute differences like 10.1 instead of 10
assert minimum <= int(duration) <= self.grace_period
def stream_request(self, config, connected, early_terminate):
expected_terminate_message = "502 Bad Gateway"

View File

@ -6,6 +6,8 @@ import (
"net"
"time"
"github.com/pkg/errors"
"github.com/cloudflare/cloudflared/management"
"github.com/cloudflare/cloudflared/tunnelrpc"
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
@ -116,27 +118,32 @@ func (c *controlStream) ServeControlStream(
}
}
c.waitForUnregister(ctx, registrationClient)
return nil
return c.waitForUnregister(ctx, registrationClient)
}
func (c *controlStream) waitForUnregister(ctx context.Context, registrationClient tunnelrpc.RegistrationClient) {
func (c *controlStream) waitForUnregister(ctx context.Context, registrationClient tunnelrpc.RegistrationClient) error {
// wait for connection termination or start of graceful shutdown
defer registrationClient.Close()
var shutdownError error
select {
case <-ctx.Done():
shutdownError = ctx.Err()
break
case <-c.gracefulShutdownC:
c.stoppedGracefully = true
}
c.observer.sendUnregisteringEvent(c.connIndex)
registrationClient.GracefulShutdown(ctx, c.gracePeriod)
err := registrationClient.GracefulShutdown(ctx, c.gracePeriod)
if err != nil {
return errors.Wrap(err, "Error shutting down control stream")
}
c.observer.log.Info().
Int(management.EventTypeKey, int(management.Cloudflared)).
Uint8(LogFieldConnIndex, c.connIndex).
IPAddr(LogFieldIPAddress, c.edgeAddress).
Msg("Unregistered tunnel connection")
return shutdownError
}
func (c *controlStream) IsStopped() bool {

View File

@ -192,8 +192,9 @@ func (mc mockNamedTunnelRPCClient) RegisterConnection(
}, nil
}
func (mc mockNamedTunnelRPCClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) {
func (mc mockNamedTunnelRPCClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) error {
close(mc.unregistered)
return nil
}
func (mockNamedTunnelRPCClient) Close() {}

View File

@ -42,12 +42,26 @@ const (
HTTPMethodKey = "HttpMethod"
// HTTPHostKey is used to get or set http Method in QUIC ALPN if the underlying proxy connection type is HTTP.
HTTPHostKey = "HttpHost"
// HTTPRequestBodyHintKey is used in ConnectRequest metadata to indicate if the request has body
HTTPRequestBodyHintKey = "HttpReqBodyHint"
QUICMetadataFlowID = "FlowID"
// emperically this capacity has been working well
demuxChanCapacity = 16
)
type RequestBodyHint uint64
const (
RequestBodyHintMissing RequestBodyHint = iota
RequestBodyHintEmpty
RequestBodyHintHasData
)
func (rbh RequestBodyHint) String() string {
return [...]string{"missing", "empty", "data"}[rbh]
}
var (
portForConnIndex = make(map[uint8]int, 0)
portMapMutex sync.Mutex
@ -69,6 +83,7 @@ type QUICConnection struct {
rpcTimeout time.Duration
streamWriteTimeout time.Duration
gracePeriod time.Duration
}
// NewQUICConnection returns a new instance of QUICConnection.
@ -86,6 +101,7 @@ func NewQUICConnection(
packetRouterConfig *ingress.GlobalRouterConfig,
rpcTimeout time.Duration,
streamWriteTimeout time.Duration,
gracePeriod time.Duration,
) (*QUICConnection, error) {
udpConn, err := createUDPConnForConnIndex(connIndex, localAddr, logger)
if err != nil {
@ -122,6 +138,7 @@ func NewQUICConnection(
connIndex: connIndex,
rpcTimeout: rpcTimeout,
streamWriteTimeout: streamWriteTimeout,
gracePeriod: gracePeriod,
}, nil
}
@ -144,8 +161,17 @@ func (q *QUICConnection) Serve(ctx context.Context) error {
// In the future, if cloudflared can autonomously push traffic to the edge, we have to make sure the control
// stream is already fully registered before the other goroutines can proceed.
errGroup.Go(func() error {
defer cancel()
return q.serveControlStream(ctx, controlStream)
// err is equal to nil if we exit due to unregistration. If that happens we want to wait the full
// amount of the grace period, allowing requests to finish before we cancel the context, which will
// make cloudflared exit.
if err := q.serveControlStream(ctx, controlStream); err == nil {
select {
case <-ctx.Done():
case <-time.Tick(q.gracePeriod):
}
}
cancel()
return err
})
errGroup.Go(func() error {
defer cancel()
@ -474,7 +500,6 @@ func buildHTTPRequest(
dest := connectRequest.Dest
method := metadata[HTTPMethodKey]
host := metadata[HTTPHostKey]
isWebsocket := connectRequest.Type == pogs.ConnectionTypeWebsocket
req, err := http.NewRequestWithContext(ctx, method, dest, body)
if err != nil {
@ -499,13 +524,8 @@ func buildHTTPRequest(
return nil, fmt.Errorf("Error setting content-length: %w", err)
}
// Go's client defaults to chunked encoding after a 200ms delay if the following cases are true:
// * the request body blocks
// * the content length is not set (or set to -1)
// * the method doesn't usually have a body (GET, HEAD, DELETE, ...)
// * there is no transfer-encoding=chunked already set.
// So, if transfer cannot be chunked and content length is 0, we dont set a request body.
if !isWebsocket && !isTransferEncodingChunked(req) && req.ContentLength == 0 {
if shouldSetRequestBodyToEmpty(connectRequest, metadata, req) {
log.Debug().Str("host", req.Host).Str("method", req.Method).Msg("Set request to have no body")
req.Body = http.NoBody
}
stripWebsocketUpgradeHeader(req)
@ -530,6 +550,35 @@ func isTransferEncodingChunked(req *http.Request) bool {
return strings.Contains(strings.ToLower(transferEncodingVal), "chunked")
}
// Borrowed from https://github.com/golang/go/blob/go1.22.6/src/net/http/request.go#L1541
func requestMethodUsuallyLacksBody(req *http.Request) bool {
switch strings.ToUpper(req.Method) {
case "GET", "HEAD", "DELETE", "OPTIONS", "PROPFIND", "SEARCH":
return true
}
return false
}
func shouldSetRequestBodyToEmpty(connectRequest *pogs.ConnectRequest, metadata map[string]string, req *http.Request) bool {
switch metadata[HTTPRequestBodyHintKey] {
case RequestBodyHintEmpty.String():
return true
case RequestBodyHintHasData.String():
return false
default:
}
isWebsocket := connectRequest.Type == pogs.ConnectionTypeWebsocket
// Go's client defaults to chunked encoding after a 200ms delay if the following cases are true:
// * the request body blocks
// * the content length is not set (or set to -1)
// * the method doesn't usually have a body (GET, HEAD, DELETE, ...)
// * there is no transfer-encoding=chunked already set.
// So, if transfer cannot be chunked and content length is 0, we dont set a request body.
// Reference: https://github.com/golang/go/blob/go1.22.2/src/net/http/transfer.go#L192-L206
return !isWebsocket && requestMethodUsuallyLacksBody(req) && !isTransferEncodingChunked(req) && req.ContentLength == 0
}
// A helper struct that guarantees a call to close only affects read side, but not write side.
type nopCloserReadWriter struct {
io.ReadWriteCloser

View File

@ -484,6 +484,125 @@ func TestBuildHTTPRequest(t *testing.T) {
},
body: io.NopCloser(&bytes.Buffer{}),
},
{
name: "if edge sends the body is empty hint, set body to empty",
connectRequest: &pogs.ConnectRequest{
Dest: "http://test.com",
Metadata: []pogs.Metadata{
{
Key: "HttpHeader:Another-Header",
Val: "Misc",
},
{
Key: "HttpHost",
Val: "cf.host",
},
{
Key: "HttpMethod",
Val: "put",
},
{
Key: HTTPRequestBodyHintKey,
Val: RequestBodyHintEmpty.String(),
},
},
},
req: &http.Request{
Method: "put",
URL: &url.URL{
Scheme: "http",
Host: "test.com",
},
Proto: "HTTP/1.1",
ProtoMajor: 1,
ProtoMinor: 1,
Header: http.Header{
"Another-Header": []string{"Misc"},
},
ContentLength: 0,
Host: "cf.host",
Body: http.NoBody,
},
body: io.NopCloser(&bytes.Buffer{}),
},
{
name: "if edge sends the body has data hint, don't set body to empty",
connectRequest: &pogs.ConnectRequest{
Dest: "http://test.com",
Metadata: []pogs.Metadata{
{
Key: "HttpHeader:Another-Header",
Val: "Misc",
},
{
Key: "HttpHost",
Val: "cf.host",
},
{
Key: "HttpMethod",
Val: "put",
},
{
Key: HTTPRequestBodyHintKey,
Val: RequestBodyHintHasData.String(),
},
},
},
req: &http.Request{
Method: "put",
URL: &url.URL{
Scheme: "http",
Host: "test.com",
},
Proto: "HTTP/1.1",
ProtoMajor: 1,
ProtoMinor: 1,
Header: http.Header{
"Another-Header": []string{"Misc"},
},
ContentLength: 0,
Host: "cf.host",
Body: io.NopCloser(&bytes.Buffer{}),
},
body: io.NopCloser(&bytes.Buffer{}),
},
{
name: "if the http method usually has body, don't set body to empty",
connectRequest: &pogs.ConnectRequest{
Dest: "http://test.com",
Metadata: []pogs.Metadata{
{
Key: "HttpHeader:Another-Header",
Val: "Misc",
},
{
Key: "HttpHost",
Val: "cf.host",
},
{
Key: "HttpMethod",
Val: "post",
},
},
},
req: &http.Request{
Method: "post",
URL: &url.URL{
Scheme: "http",
Host: "test.com",
},
Proto: "HTTP/1.1",
ProtoMajor: 1,
ProtoMinor: 1,
Header: http.Header{
"Another-Header": []string{"Misc"},
},
ContentLength: 0,
Host: "cf.host",
Body: io.NopCloser(&bytes.Buffer{}),
},
body: io.NopCloser(&bytes.Buffer{}),
},
}
log := zerolog.Nop()
@ -736,6 +855,7 @@ func testQUICConnection(udpListenerAddr net.Addr, t *testing.T, index uint8) *QU
nil,
15*time.Second,
0*time.Second,
0*time.Second,
)
require.NoError(t, err)
return qc

View File

@ -11,8 +11,9 @@ import hashlib
import requests
import tarfile
from os import listdir
from os.path import isfile, join
from os.path import isfile, join, splitext
import re
import subprocess
from github import Github, GithubException, UnknownObjectException
@ -210,6 +211,61 @@ def move_asset(filepath, filename):
except shutil.SameFileError:
pass # the macOS release copy fails with being the same file (already in the artifacts directory)
def get_binary_version(binary_path):
"""
Sample output from go version -m <binary>:
...
build -compiler=gc
build -ldflags="-X \"main.Version=2024.8.3-6-gec072691\" -X \"main.BuildTime=2024-09-10-1027 UTC\" "
build CGO_ENABLED=1
...
This function parses the above output to retrieve the following substring 2024.8.3-6-gec072691.
To do this a start and end indexes are computed and the a slice is extracted from the output using them.
"""
needle = "main.Version="
cmd = ['go','version', '-m', binary_path]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, _ = process.communicate()
version_info = output.decode()
# Find start of needle
needle_index = version_info.find(needle)
# Find backward slash relative to the beggining of the needle
relative_end_index = version_info[needle_index:].find("\\")
# Calculate needle position plus needle length to find version beggining
start_index = needle_index + len(needle)
# Calculate needle position plus relative position of the backward slash
end_index = needle_index + relative_end_index
return version_info[start_index:end_index]
def assert_asset_version(binary_path, release_version):
"""
Asserts that the artifacts have the correct release_version.
The artifacts that are checked must not have an extension expecting .exe and .tgz.
In the occurrence of any other extension the function exits early.
"""
try:
shutil.rmtree('tmp')
except OSError:
pass
_, ext = os.path.splitext(binary_path)
if ext == '.exe' or ext == '':
binary_version = get_binary_version(binary_path)
elif ext == '.tgz':
tar = tarfile.open(binary_path, "r:gz")
tar.extractall("tmp", filter='data')
tar.close()
binary_path = os.path.join(os.getcwd(), 'tmp', 'cloudflared')
binary_version = get_binary_version(binary_path)
else:
return
if binary_version != release_version:
logging.error(f"Version mismatch {binary_path}, binary_version {binary_version} release_version {release_version}")
exit(1)
def main():
""" Attempts to upload Asset to Github Release. Creates Release if it doesn't exist """
try:
@ -221,6 +277,7 @@ def main():
for filename in onlyfiles:
binary_path = os.path.join(args.path, filename)
logging.info("binary: " + binary_path)
assert_asset_version(binary_path, args.release_version)
elif os.path.isfile(args.path):
logging.info("binary: " + binary_path)
else:
@ -233,6 +290,9 @@ def main():
if os.path.isdir(args.path):
onlyfiles = [f for f in listdir(args.path) if isfile(join(args.path, f))]
for filename in onlyfiles:
binary_path = os.path.join(args.path, filename)
assert_asset_version(binary_path, args.release_version)
for filename in onlyfiles:
binary_path = os.path.join(args.path, filename)
upload_asset(release, binary_path, filename, args.release_version, args.kv_account_id, args.namespace_id,

View File

@ -604,6 +604,7 @@ func (e *EdgeTunnelServer) serveQUIC(
e.config.PacketConfig,
e.config.RPCTimeout,
e.config.WriteStreamTimeout,
e.config.GracePeriod,
)
if err != nil {
connLogger.ConnAwareLogger().Err(err).Msgf("Failed to create new quic connection")

View File

@ -23,7 +23,7 @@ type RegistrationClient interface {
edgeAddress net.IP,
) (*pogs.ConnectionDetails, error)
SendLocalConfiguration(ctx context.Context, config []byte) error
GracefulShutdown(ctx context.Context, gracePeriod time.Duration)
GracefulShutdown(ctx context.Context, gracePeriod time.Duration) error
Close()
}
@ -79,7 +79,7 @@ func (r *registrationClient) SendLocalConfiguration(ctx context.Context, config
return err
}
func (r *registrationClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) {
func (r *registrationClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, gracePeriod)
defer cancel()
defer metrics.CapnpMetrics.ClientOperations.WithLabelValues(metrics.Registration, metrics.OperationUnregisterConnection).Inc()
@ -88,7 +88,9 @@ func (r *registrationClient) GracefulShutdown(ctx context.Context, gracePeriod t
err := r.client.UnregisterConnection(ctx)
if err != nil {
metrics.CapnpMetrics.ClientFailures.WithLabelValues(metrics.Registration, metrics.OperationUnregisterConnection).Inc()
return err
}
return nil
}
func (r *registrationClient) Close() {