TUN-8729: implement network collection for diagnostic procedure

## Summary
This PR adds implementation for windows & unix that collect the tracert.exe & traceroute output in the form of hops.

Closes TUN-8729
This commit is contained in:
Luis Neto 2024-11-29 07:43:36 -08:00
parent 9da15b5d96
commit 28796c659e
7 changed files with 555 additions and 2 deletions

View File

@ -17,5 +17,4 @@ var (
ErrKeyNotFound = errors.New("key not found") ErrKeyNotFound = errors.New("key not found")
// Error used when there is no disk volume information available. // Error used when there is no disk volume information available.
ErrNoVolumeFound = errors.New("no disk volume information found") ErrNoVolumeFound = errors.New("no disk volume information found")
ErrNoPathAvailable = errors.New("no path available")
) )

View File

@ -0,0 +1,74 @@
package diagnostic
import (
"context"
"time"
)
const MicrosecondsFactor = 1000.0
// For now only support ICMP is provided.
type IPVersion int
const (
V4 IPVersion = iota
V6 IPVersion = iota
)
type Hop struct {
Hop uint8 `json:"hop,omitempty"` // hop number along the route
Domain string `json:"domain,omitempty"` // domain and/or ip of the hop, this field will be '*' if the hop is a timeout
Rtts []time.Duration `json:"rtts,omitempty"` // RTT measurements in microseconds
}
type TraceOptions struct {
ttl uint64 // number of hops to perform
timeout time.Duration // wait timeout for each response
address string // address to trace
useV4 bool
}
func NewTimeoutHop(
hop uint8,
) *Hop {
// Whenever there is a hop in the format of 'N * * *'
// it means that the hop in the path didn't answer to
// any probe.
return NewHop(
hop,
"*",
nil,
)
}
func NewHop(hop uint8, domain string, rtts []time.Duration) *Hop {
return &Hop{
hop,
domain,
rtts,
}
}
func NewTraceOptions(
ttl uint64,
timeout time.Duration,
address string,
useV4 bool,
) TraceOptions {
return TraceOptions{
ttl,
timeout,
address,
useV4,
}
}
type NetworkCollector interface {
// Performs a trace route operation with the specified options.
// In case the trace fails, it will return a non-nil error and
// it may return a string which represents the raw information
// obtained.
// In case it is successful it will only return an array of Hops
// an empty string and a nil error.
Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error)
}

View File

@ -0,0 +1,74 @@
//go:build darwin || linux
package diagnostic
import (
"context"
"fmt"
"os/exec"
"strconv"
"strings"
"time"
)
type NetworkCollectorImpl struct{}
func (tracer *NetworkCollectorImpl) Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error) {
args := []string{
"-I",
"-w",
strconv.FormatInt(int64(options.timeout.Seconds()), 10),
"-m",
strconv.FormatUint(options.ttl, 10),
options.address,
}
var command string
switch options.useV4 {
case false:
command = "traceroute6"
default:
command = "traceroute"
}
process := exec.CommandContext(ctx, command, args...)
return decodeNetworkOutputToFile(process, DecodeLine)
}
func DecodeLine(text string) (*Hop, error) {
fields := strings.Fields(text)
parts := []string{}
filter := func(s string) bool { return s != "*" && s != "ms" }
for _, field := range fields {
if filter(field) {
parts = append(parts, field)
}
}
index, err := strconv.ParseUint(parts[0], 10, 8)
if err != nil {
return nil, fmt.Errorf("couldn't parse index from timeout hop: %w", err)
}
if len(parts) == 1 {
return NewTimeoutHop(uint8(index)), nil
}
domain := ""
rtts := []time.Duration{}
for _, part := range parts[1:] {
rtt, err := strconv.ParseFloat(part, 64)
if err != nil {
domain += part + " "
} else {
rtts = append(rtts, time.Duration(rtt*MicrosecondsFactor))
}
}
domain, _ = strings.CutSuffix(domain, " ")
return NewHop(uint8(index), domain, rtts), nil
}

View File

@ -0,0 +1,135 @@
//go:build darwin || linux
package diagnostic_test
import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
diagnostic "github.com/cloudflare/cloudflared/diagnostic/network"
)
func TestDecode(t *testing.T) {
t.Parallel()
tests := []struct {
name string
text string
expectedHops []*diagnostic.Hop
expectErr bool
}{
{
"repeated hop index parse failure",
`1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
someletters * * *`,
nil,
true,
},
{
"hop index parse failure",
`1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
someletters 8.8.8.8 8.8.8.9 abc ms 0.456 ms 0.789 ms`,
nil,
true,
},
{
"missing rtt",
`1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
2 * 8.8.8.8 8.8.8.9 0.456 ms 0.789 ms`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewHop(
uint8(2),
"8.8.8.8 8.8.8.9",
[]time.Duration{
time.Duration(456),
time.Duration(789),
},
),
},
false,
},
{
"simple example ipv4",
`1 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
2 172.68.101.121 (172.68.101.121) 12.874 ms 15.517 ms 15.311 ms
3 * * *`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewHop(
uint8(2),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewTimeoutHop(uint8(3)),
},
false,
},
{
"simple example ipv6",
` 1 2400:cb00:107:1024::ac44:6550 12.780 ms 9.118 ms 10.046 ms
2 2a09:bac1:: 9.945 ms 10.033 ms 11.562 ms`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"2400:cb00:107:1024::ac44:6550",
[]time.Duration{
time.Duration(12780),
time.Duration(9118),
time.Duration(10046),
},
),
diagnostic.NewHop(
uint8(2),
"2a09:bac1::",
[]time.Duration{
time.Duration(9945),
time.Duration(10033),
time.Duration(11562),
},
),
},
false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
hops, err := diagnostic.Decode(strings.NewReader(test.text), diagnostic.DecodeLine)
if test.expectErr {
require.Error(t, err)
} else {
require.NoError(t, err)
assert.Equal(t, test.expectedHops, hops)
}
})
}
}

View File

@ -0,0 +1,65 @@
package diagnostic
import (
"bufio"
"bytes"
"fmt"
"io"
"os/exec"
)
type DecodeLineFunc func(text string) (*Hop, error)
func decodeNetworkOutputToFile(command *exec.Cmd, fn DecodeLineFunc) ([]*Hop, string, error) {
stdout, err := command.StdoutPipe()
if err != nil {
return nil, "", fmt.Errorf("error piping traceroute's output: %w", err)
}
if err := command.Start(); err != nil {
return nil, "", fmt.Errorf("error starting traceroute: %w", err)
}
// Tee the output to a string to have the raw information
// in case the decode call fails
// This error is handled only after the Wait call below returns
// otherwise the process can become a zombie
buf := bytes.NewBuffer([]byte{})
tee := io.TeeReader(stdout, buf)
hops, err := Decode(tee, fn)
if werr := command.Wait(); werr != nil {
return nil, "", fmt.Errorf("error finishing traceroute: %w", werr)
}
if err != nil {
// consume all output to have available in buf
io.ReadAll(tee)
// This is already a TracerouteError no need to wrap it
return nil, buf.String(), err
}
return hops, "", nil
}
func Decode(reader io.Reader, fn DecodeLineFunc) ([]*Hop, error) {
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
var hops []*Hop
for scanner.Scan() {
text := scanner.Text()
hop, err := fn(text)
if err != nil {
return nil, fmt.Errorf("error decoding output line: %w", err)
}
hops = append(hops, hop)
}
if scanner.Err() != nil {
return nil, fmt.Errorf("scanner reported an error: %w", scanner.Err())
}
return hops, nil
}

View File

@ -0,0 +1,71 @@
//go:build windows
package diagnostic
import (
"context"
"fmt"
"os/exec"
"strconv"
"strings"
"time"
)
type NetworkCollectorImpl struct{}
func (tracer *NetworkCollectorImpl) Collect(ctx context.Context, options TraceOptions) ([]*Hop, string, error) {
args := []string{
"-w",
strconv.FormatInt(int64(options.timeout.Seconds()), 10),
"-h",
strconv.FormatUint(options.ttl, 10),
// Do not resolve host names (can add 30+ seconds to run time)
"-d",
options.address,
}
if options.useV4 {
args = append(args, "-4")
} else {
args = append(args, "-6")
}
command := exec.CommandContext(ctx, "tracert.exe", args...)
return decodeNetworkOutputToFile(command, DecodeLine)
}
func DecodeLine(text string) (*Hop, error) {
fields := strings.Fields(text)
parts := []string{}
filter := func(s string) bool { return s != "*" && s != "ms" }
for _, field := range fields {
if filter(field) {
parts = append(parts, field)
}
}
index, err := strconv.ParseUint(parts[0], 10, 8)
if err != nil {
return nil, fmt.Errorf("couldn't parse index from timeout hop: %w", err)
}
if len(parts) == 1 {
return NewTimeoutHop(uint8(index)), nil
}
domain := ""
rtts := []time.Duration{}
for _, part := range parts[1:] {
rtt, err := strconv.ParseFloat(strings.TrimLeft(part, "<"), 64)
if err != nil {
domain += part + " "
} else {
rtts = append(rtts, time.Duration(rtt*MicrosecondsFactor))
}
}
domain, _ = strings.CutSuffix(domain, " ")
return NewHop(uint8(index), domain, rtts), nil
}

View File

@ -0,0 +1,135 @@
//go:build windows
package diagnostic_test
import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
diagnostic "github.com/cloudflare/cloudflared/diagnostic/network"
)
func TestDecode(t *testing.T) {
t.Parallel()
tests := []struct {
name string
text string
expectedHops []*diagnostic.Hop
expectErr bool
}{
{
"repeated hop index parse failure",
`1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
someletters * * *`,
nil,
true,
},
{
"hop index parse failure",
`1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
someletters abc ms 0.456 ms 0.789 ms 8.8.8.8 8.8.8.9`,
nil,
true,
},
{
"missing rtt",
`1 <12.874 ms <15.517 ms <15.311 ms 172.68.101.121 (172.68.101.121)
2 * 0.456 ms 0.789 ms 8.8.8.8 8.8.8.9`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewHop(
uint8(2),
"8.8.8.8 8.8.8.9",
[]time.Duration{
time.Duration(456),
time.Duration(789),
},
),
},
false,
},
{
"simple example ipv4",
`1 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
2 12.874 ms 15.517 ms 15.311 ms 172.68.101.121 (172.68.101.121)
3 * * *`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewHop(
uint8(2),
"172.68.101.121 (172.68.101.121)",
[]time.Duration{
time.Duration(12874),
time.Duration(15517),
time.Duration(15311),
},
),
diagnostic.NewTimeoutHop(uint8(3)),
},
false,
},
{
"simple example ipv6",
` 1 12.780 ms 9.118 ms 10.046 ms 2400:cb00:107:1024::ac44:6550
2 9.945 ms 10.033 ms 11.562 ms 2a09:bac1::`,
[]*diagnostic.Hop{
diagnostic.NewHop(
uint8(1),
"2400:cb00:107:1024::ac44:6550",
[]time.Duration{
time.Duration(12780),
time.Duration(9118),
time.Duration(10046),
},
),
diagnostic.NewHop(
uint8(2),
"2a09:bac1::",
[]time.Duration{
time.Duration(9945),
time.Duration(10033),
time.Duration(11562),
},
),
},
false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
hops, err := diagnostic.Decode(strings.NewReader(test.text), diagnostic.DecodeLine)
if test.expectErr {
require.Error(t, err)
} else {
require.NoError(t, err)
assert.Equal(t, test.expectedHops, hops)
}
})
}
}