From 28796c659e002955b748e2b24e9d4cf6edb60110 Mon Sep 17 00:00:00 2001 From: Luis Neto Date: Fri, 29 Nov 2024 07:43:36 -0800 Subject: [PATCH] TUN-8729: implement network collection for diagnostic procedure ## Summary This PR adds implementation for windows & unix that collect the tracert.exe & traceroute output in the form of hops. Closes TUN-8729 --- diagnostic/error.go | 3 +- diagnostic/network/collector.go | 74 ++++++++++ diagnostic/network/collector_unix.go | 74 ++++++++++ diagnostic/network/collector_unix_test.go | 135 +++++++++++++++++++ diagnostic/network/collector_utils.go | 65 +++++++++ diagnostic/network/collector_windows.go | 71 ++++++++++ diagnostic/network/collector_windows_test.go | 135 +++++++++++++++++++ 7 files changed, 555 insertions(+), 2 deletions(-) create mode 100644 diagnostic/network/collector.go create mode 100644 diagnostic/network/collector_unix.go create mode 100644 diagnostic/network/collector_unix_test.go create mode 100644 diagnostic/network/collector_utils.go create mode 100644 diagnostic/network/collector_windows.go create mode 100644 diagnostic/network/collector_windows_test.go diff --git a/diagnostic/error.go b/diagnostic/error.go index 32cca31d..a7cabec7 100644 --- a/diagnostic/error.go +++ b/diagnostic/error.go @@ -16,6 +16,5 @@ var ( // Error used when given key is not found while parsing KV. ErrKeyNotFound = errors.New("key not found") // Error used when there is no disk volume information available. - ErrNoVolumeFound = errors.New("no disk volume information found") - ErrNoPathAvailable = errors.New("no path available") + ErrNoVolumeFound = errors.New("no disk volume information found") ) diff --git a/diagnostic/network/collector.go b/diagnostic/network/collector.go new file mode 100644 index 00000000..5b742060 --- /dev/null +++ b/diagnostic/network/collector.go @@ -0,0 +1,74 @@ +package diagnostic + +import ( + "context" + "time" +) + +const MicrosecondsFactor = 1000.0 + +// For now only support ICMP is provided. +type IPVersion int + +const ( + V4 IPVersion = iota + V6 IPVersion = iota +) + +type Hop struct { + Hop uint8 `json:"hop,omitempty"` // hop number along the route + Domain string `json:"domain,omitempty"` // domain and/or ip of the hop, this field will be '*' if the hop is a timeout + Rtts []time.Duration `json:"rtts,omitempty"` // RTT measurements in microseconds +} + +type TraceOptions struct { + ttl uint64 // number of hops to perform + timeout time.Duration // wait timeout for each response + address string // address to trace + useV4 bool +} + +func NewTimeoutHop( + hop uint8, +) *Hop { + // Whenever there is a hop in the format of 'N * * *' + // it means that the hop in the path didn't answer to + // any probe. + return NewHop( + hop, + "*", + nil, + ) +} + +func NewHop(hop uint8, domain string, rtts []time.Duration) *Hop { + return &Hop{ + hop, + domain, + rtts, + } +} + +func NewTraceOptions( + ttl uint64, + timeout time.Duration, + address string, + useV4 bool, +) TraceOptions { + return TraceOptions{ + ttl, + timeout, + address, + useV4, + } +} + +type NetworkCollector interface { + // Performs a trace route operation with the specified options. + // In case the trace fails, it will return a non-nil error and + // it may return a string which represents the raw information + // obtained. + // In case it is successful it will only return an array of Hops + // an empty string and a nil error. + Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error) +} diff --git a/diagnostic/network/collector_unix.go b/diagnostic/network/collector_unix.go new file mode 100644 index 00000000..60cfdf89 --- /dev/null +++ b/diagnostic/network/collector_unix.go @@ -0,0 +1,74 @@ +//go:build darwin || linux + +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "strconv" + "strings" + "time" +) + +type NetworkCollectorImpl struct{} + +func (tracer *NetworkCollectorImpl) Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error) { + args := []string{ + "-I", + "-w", + strconv.FormatInt(int64(options.timeout.Seconds()), 10), + "-m", + strconv.FormatUint(options.ttl, 10), + options.address, + } + + var command string + + switch options.useV4 { + case false: + command = "traceroute6" + default: + command = "traceroute" + } + + process := exec.CommandContext(ctx, command, args...) + + return decodeNetworkOutputToFile(process, DecodeLine) +} + +func DecodeLine(text string) (*Hop, error) { + fields := strings.Fields(text) + parts := []string{} + filter := func(s string) bool { return s != "*" && s != "ms" } + + for _, field := range fields { + if filter(field) { + parts = append(parts, field) + } + } + + index, err := strconv.ParseUint(parts[0], 10, 8) + if err != nil { + return nil, fmt.Errorf("couldn't parse index from timeout hop: %w", err) + } + + if len(parts) == 1 { + return NewTimeoutHop(uint8(index)), nil + } + + domain := "" + rtts := []time.Duration{} + + for _, part := range parts[1:] { + rtt, err := strconv.ParseFloat(part, 64) + if err != nil { + domain += part + " " + } else { + rtts = append(rtts, time.Duration(rtt*MicrosecondsFactor)) + } + } + domain, _ = strings.CutSuffix(domain, " ") + + return NewHop(uint8(index), domain, rtts), nil +} diff --git a/diagnostic/network/collector_unix_test.go b/diagnostic/network/collector_unix_test.go new file mode 100644 index 00000000..73b7d7dd --- /dev/null +++ b/diagnostic/network/collector_unix_test.go @@ -0,0 +1,135 @@ +//go:build darwin || linux + +package diagnostic_test + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + diagnostic "github.com/cloudflare/cloudflared/diagnostic/network" +) + +func TestDecode(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + text string + expectedHops []*diagnostic.Hop + expectErr bool + }{ + { + "repeated hop index parse failure", + `1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +someletters * * *`, + nil, + true, + }, + { + "hop index parse failure", + `1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +someletters 8.8.8.8 8.8.8.9 abc ms 0.456 ms 0.789 ms`, + nil, + true, + }, + { + "missing rtt", + `1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +2 * 8.8.8.8 8.8.8.9 0.456 ms 0.789 ms`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewHop( + uint8(2), + "8.8.8.8 8.8.8.9", + []time.Duration{ + time.Duration(456), + time.Duration(789), + }, + ), + }, + false, + }, + { + "simple example ipv4", + `1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms +3 * * *`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewHop( + uint8(2), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewTimeoutHop(uint8(3)), + }, + false, + }, + { + "simple example ipv6", + ` 1 2400:cb00:107:1024::ac44:6550 12.780 ms 9.118 ms 10.046 ms + 2 2a09:bac1:: 9.945 ms 10.033 ms 11.562 ms`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "2400:cb00:107:1024::ac44:6550", + []time.Duration{ + time.Duration(12780), + time.Duration(9118), + time.Duration(10046), + }, + ), + diagnostic.NewHop( + uint8(2), + "2a09:bac1::", + []time.Duration{ + time.Duration(9945), + time.Duration(10033), + time.Duration(11562), + }, + ), + }, + false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + hops, err := diagnostic.Decode(strings.NewReader(test.text), diagnostic.DecodeLine) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, test.expectedHops, hops) + } + }) + } +} diff --git a/diagnostic/network/collector_utils.go b/diagnostic/network/collector_utils.go new file mode 100644 index 00000000..4505dd91 --- /dev/null +++ b/diagnostic/network/collector_utils.go @@ -0,0 +1,65 @@ +package diagnostic + +import ( + "bufio" + "bytes" + "fmt" + "io" + "os/exec" +) + +type DecodeLineFunc func(text string) (*Hop, error) + +func decodeNetworkOutputToFile(command *exec.Cmd, fn DecodeLineFunc) ([]*Hop, string, error) { + stdout, err := command.StdoutPipe() + if err != nil { + return nil, "", fmt.Errorf("error piping traceroute's output: %w", err) + } + + if err := command.Start(); err != nil { + return nil, "", fmt.Errorf("error starting traceroute: %w", err) + } + + // Tee the output to a string to have the raw information + // in case the decode call fails + // This error is handled only after the Wait call below returns + // otherwise the process can become a zombie + buf := bytes.NewBuffer([]byte{}) + tee := io.TeeReader(stdout, buf) + hops, err := Decode(tee, fn) + + if werr := command.Wait(); werr != nil { + return nil, "", fmt.Errorf("error finishing traceroute: %w", werr) + } + + if err != nil { + // consume all output to have available in buf + io.ReadAll(tee) + // This is already a TracerouteError no need to wrap it + return nil, buf.String(), err + } + + return hops, "", nil +} + +func Decode(reader io.Reader, fn DecodeLineFunc) ([]*Hop, error) { + scanner := bufio.NewScanner(reader) + scanner.Split(bufio.ScanLines) + + var hops []*Hop + for scanner.Scan() { + text := scanner.Text() + hop, err := fn(text) + if err != nil { + return nil, fmt.Errorf("error decoding output line: %w", err) + } + + hops = append(hops, hop) + } + + if scanner.Err() != nil { + return nil, fmt.Errorf("scanner reported an error: %w", scanner.Err()) + } + + return hops, nil +} diff --git a/diagnostic/network/collector_windows.go b/diagnostic/network/collector_windows.go new file mode 100644 index 00000000..d590ac53 --- /dev/null +++ b/diagnostic/network/collector_windows.go @@ -0,0 +1,71 @@ +//go:build windows + +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "strconv" + "strings" + "time" +) + +type NetworkCollectorImpl struct{} + +func (tracer *NetworkCollectorImpl) Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error) { + args := []string{ + "-w", + strconv.FormatInt(int64(options.timeout.Seconds()), 10), + "-h", + strconv.FormatUint(options.ttl, 10), + // Do not resolve host names (can add 30+ seconds to run time) + "-d", + options.address, + } + if options.useV4 { + args = append(args, "-4") + } else { + args = append(args, "-6") + } + command := exec.CommandContext(ctx, "tracert.exe", args...) + + return decodeNetworkOutputToFile(command, DecodeLine) +} + +func DecodeLine(text string) (*Hop, error) { + fields := strings.Fields(text) + parts := []string{} + filter := func(s string) bool { return s != "*" && s != "ms" } + + for _, field := range fields { + if filter(field) { + parts = append(parts, field) + } + } + + index, err := strconv.ParseUint(parts[0], 10, 8) + if err != nil { + return nil, fmt.Errorf("couldn't parse index from timeout hop: %w", err) + } + + if len(parts) == 1 { + return NewTimeoutHop(uint8(index)), nil + } + + domain := "" + rtts := []time.Duration{} + + for _, part := range parts[1:] { + + rtt, err := strconv.ParseFloat(strings.TrimLeft(part, "<"), 64) + + if err != nil { + domain += part + " " + } else { + rtts = append(rtts, time.Duration(rtt*MicrosecondsFactor)) + } + } + domain, _ = strings.CutSuffix(domain, " ") + return NewHop(uint8(index), domain, rtts), nil +} diff --git a/diagnostic/network/collector_windows_test.go b/diagnostic/network/collector_windows_test.go new file mode 100644 index 00000000..c591d2cb --- /dev/null +++ b/diagnostic/network/collector_windows_test.go @@ -0,0 +1,135 @@ +//go:build windows + +package diagnostic_test + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + diagnostic "github.com/cloudflare/cloudflared/diagnostic/network" +) + +func TestDecode(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + text string + expectedHops []*diagnostic.Hop + expectErr bool + }{ + { + "repeated hop index parse failure", + `1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +someletters * * *`, + nil, + true, + }, + { + "hop index parse failure", + `1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +someletters abc ms 0.456 ms 0.789 ms 8.8.8.8 8.8.8.9`, + nil, + true, + }, + { + "missing rtt", + `1 <12.874 ms <15.517 ms <15.311 ms 172.68.101.121 (172.68.101.121) +2 * 0.456 ms 0.789 ms 8.8.8.8 8.8.8.9`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewHop( + uint8(2), + "8.8.8.8 8.8.8.9", + []time.Duration{ + time.Duration(456), + time.Duration(789), + }, + ), + }, + false, + }, + { + "simple example ipv4", + `1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121) +3 * * *`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewHop( + uint8(2), + "172.68.101.121 (172.68.101.121)", + []time.Duration{ + time.Duration(12874), + time.Duration(15517), + time.Duration(15311), + }, + ), + diagnostic.NewTimeoutHop(uint8(3)), + }, + false, + }, + { + "simple example ipv6", + ` 1 12.780 ms 9.118 ms 10.046 ms 2400:cb00:107:1024::ac44:6550 + 2 9.945 ms 10.033 ms 11.562 ms 2a09:bac1::`, + []*diagnostic.Hop{ + diagnostic.NewHop( + uint8(1), + "2400:cb00:107:1024::ac44:6550", + []time.Duration{ + time.Duration(12780), + time.Duration(9118), + time.Duration(10046), + }, + ), + diagnostic.NewHop( + uint8(2), + "2a09:bac1::", + []time.Duration{ + time.Duration(9945), + time.Duration(10033), + time.Duration(11562), + }, + ), + }, + false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + hops, err := diagnostic.Decode(strings.NewReader(test.text), diagnostic.DecodeLine) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, test.expectedHops, hops) + } + }) + } +}