TUN-8724: Add CLI command for diagnostic procedure
## Summary Adds a new CLI subcommand, under the tunnel command, the `diag`. This command has as function the automatic collection of different data points, such as, logs, metrics, network information, system information, tunnel state, and runtime information which will be written to a single zip file. Closes TUN-8724
This commit is contained in:
parent
8ed19222b9
commit
1859d742a8
|
@ -236,6 +236,7 @@ func Commands() []*cli.Command {
|
||||||
buildDeleteCommand(),
|
buildDeleteCommand(),
|
||||||
buildCleanupCommand(),
|
buildCleanupCommand(),
|
||||||
buildTokenCommand(),
|
buildTokenCommand(),
|
||||||
|
buildDiagCommand(),
|
||||||
// for compatibility, allow following as tunnel subcommands
|
// for compatibility, allow following as tunnel subcommands
|
||||||
proxydns.Command(true),
|
proxydns.Command(true),
|
||||||
cliutil.RemovedCommand("db-connect"),
|
cliutil.RemovedCommand("db-connect"),
|
||||||
|
|
|
@ -28,16 +28,26 @@ import (
|
||||||
"github.com/cloudflare/cloudflared/cmd/cloudflared/updater"
|
"github.com/cloudflare/cloudflared/cmd/cloudflared/updater"
|
||||||
"github.com/cloudflare/cloudflared/config"
|
"github.com/cloudflare/cloudflared/config"
|
||||||
"github.com/cloudflare/cloudflared/connection"
|
"github.com/cloudflare/cloudflared/connection"
|
||||||
|
"github.com/cloudflare/cloudflared/diagnostic"
|
||||||
|
"github.com/cloudflare/cloudflared/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
allSortByOptions = "name, id, createdAt, deletedAt, numConnections"
|
allSortByOptions = "name, id, createdAt, deletedAt, numConnections"
|
||||||
connsSortByOptions = "id, startedAt, numConnections, version"
|
connsSortByOptions = "id, startedAt, numConnections, version"
|
||||||
CredFileFlagAlias = "cred-file"
|
CredFileFlagAlias = "cred-file"
|
||||||
CredFileFlag = "credentials-file"
|
CredFileFlag = "credentials-file"
|
||||||
CredContentsFlag = "credentials-contents"
|
CredContentsFlag = "credentials-contents"
|
||||||
TunnelTokenFlag = "token"
|
TunnelTokenFlag = "token"
|
||||||
overwriteDNSFlagName = "overwrite-dns"
|
overwriteDNSFlagName = "overwrite-dns"
|
||||||
|
noDiagLogsFlagName = "no-diag-logs"
|
||||||
|
noDiagMetricsFlagName = "no-diag-metrics"
|
||||||
|
noDiagSystemFlagName = "no-diag-system"
|
||||||
|
noDiagRuntimeFlagName = "no-diag-runtime"
|
||||||
|
noDiagNetworkFlagName = "no-diag-network"
|
||||||
|
diagContainerIDFlagName = "diag-container-id"
|
||||||
|
diagPodFlagName = "diag-pod-id"
|
||||||
|
metricsFlagName = "metrics"
|
||||||
|
|
||||||
LogFieldTunnelID = "tunnelID"
|
LogFieldTunnelID = "tunnelID"
|
||||||
)
|
)
|
||||||
|
@ -179,6 +189,46 @@ var (
|
||||||
Usage: "Source address and the interface name to send/receive ICMPv6 messages. If not provided cloudflared will dial a local address to determine the source IP or fallback to ::.",
|
Usage: "Source address and the interface name to send/receive ICMPv6 messages. If not provided cloudflared will dial a local address to determine the source IP or fallback to ::.",
|
||||||
EnvVars: []string{"TUNNEL_ICMPV6_SRC"},
|
EnvVars: []string{"TUNNEL_ICMPV6_SRC"},
|
||||||
}
|
}
|
||||||
|
metricsFlag = &cli.StringFlag{
|
||||||
|
Name: metricsFlagName,
|
||||||
|
Usage: "The metrics server address i.e.: 127.0.0.1:12345. If your instance is running in a Docker/Kubernetes environment you need to setup port forwarding for your application.",
|
||||||
|
Value: "",
|
||||||
|
}
|
||||||
|
diagContainerFlag = &cli.StringFlag{
|
||||||
|
Name: diagContainerIDFlagName,
|
||||||
|
Usage: "Container ID or Name to collect logs from",
|
||||||
|
Value: "",
|
||||||
|
}
|
||||||
|
diagPodFlag = &cli.StringFlag{
|
||||||
|
Name: diagPodFlagName,
|
||||||
|
Usage: "Kubernetes POD to collect logs from",
|
||||||
|
Value: "",
|
||||||
|
}
|
||||||
|
noDiagLogsFlag = &cli.BoolFlag{
|
||||||
|
Name: noDiagLogsFlagName,
|
||||||
|
Usage: "Log collection will not be performed",
|
||||||
|
Value: false,
|
||||||
|
}
|
||||||
|
noDiagMetricsFlag = &cli.BoolFlag{
|
||||||
|
Name: noDiagMetricsFlagName,
|
||||||
|
Usage: "Metric collection will not be performed",
|
||||||
|
Value: false,
|
||||||
|
}
|
||||||
|
noDiagSystemFlag = &cli.BoolFlag{
|
||||||
|
Name: noDiagSystemFlagName,
|
||||||
|
Usage: "System information collection will not be performed",
|
||||||
|
Value: false,
|
||||||
|
}
|
||||||
|
noDiagRuntimeFlag = &cli.BoolFlag{
|
||||||
|
Name: noDiagRuntimeFlagName,
|
||||||
|
Usage: "Runtime information collection will not be performed",
|
||||||
|
Value: false,
|
||||||
|
}
|
||||||
|
noDiagNetworkFlag = &cli.BoolFlag{
|
||||||
|
Name: noDiagNetworkFlagName,
|
||||||
|
Usage: "Network diagnostics won't be performed",
|
||||||
|
Value: false,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func buildCreateCommand() *cli.Command {
|
func buildCreateCommand() *cli.Command {
|
||||||
|
@ -375,7 +425,6 @@ func formatAndPrintTunnelList(tunnels []*cfapi.Tunnel, showRecentlyDisconnected
|
||||||
}
|
}
|
||||||
|
|
||||||
func fmtConnections(connections []cfapi.Connection, showRecentlyDisconnected bool) string {
|
func fmtConnections(connections []cfapi.Connection, showRecentlyDisconnected bool) string {
|
||||||
|
|
||||||
// Count connections per colo
|
// Count connections per colo
|
||||||
numConnsPerColo := make(map[string]uint, len(connections))
|
numConnsPerColo := make(map[string]uint, len(connections))
|
||||||
for _, connection := range connections {
|
for _, connection := range connections {
|
||||||
|
@ -897,8 +946,10 @@ func lbRouteFromArg(c *cli.Context) (cfapi.HostnameRoute, error) {
|
||||||
return cfapi.NewLBRoute(lbName, lbPool), nil
|
return cfapi.NewLBRoute(lbName, lbPool), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var nameRegex = regexp.MustCompile("^[_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
|
var (
|
||||||
var hostNameRegex = regexp.MustCompile("^[*_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
|
nameRegex = regexp.MustCompile("^[_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
|
||||||
|
hostNameRegex = regexp.MustCompile("^[*_a-zA-Z0-9][-_.a-zA-Z0-9]*$")
|
||||||
|
)
|
||||||
|
|
||||||
func validateName(s string, allowWildcardSubdomain bool) bool {
|
func validateName(s string, allowWildcardSubdomain bool) bool {
|
||||||
if allowWildcardSubdomain {
|
if allowWildcardSubdomain {
|
||||||
|
@ -986,3 +1037,78 @@ SUBCOMMAND OPTIONS:
|
||||||
`
|
`
|
||||||
return fmt.Sprintf(template, parentFlagsHelp)
|
return fmt.Sprintf(template, parentFlagsHelp)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildDiagCommand() *cli.Command {
|
||||||
|
return &cli.Command{
|
||||||
|
Name: "diag",
|
||||||
|
Action: cliutil.ConfiguredAction(diagCommand),
|
||||||
|
Usage: "Creates a diagnostic report from a local cloudflared instance",
|
||||||
|
UsageText: "cloudflared tunnel [tunnel command options] diag [subcommand options]",
|
||||||
|
Description: "cloudflared tunnel diag will create a diagnostic report of a local cloudflared instance. The diagnostic procedure collects: logs, metrics, system information, traceroute to Cloudflare Edge, and runtime information. Since there may be multiple instances of cloudflared running the --metrics option may be provided to target a specific instance.",
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
metricsFlag,
|
||||||
|
diagContainerFlag,
|
||||||
|
diagPodFlag,
|
||||||
|
noDiagLogsFlag,
|
||||||
|
noDiagMetricsFlag,
|
||||||
|
noDiagSystemFlag,
|
||||||
|
noDiagRuntimeFlag,
|
||||||
|
noDiagNetworkFlag,
|
||||||
|
},
|
||||||
|
CustomHelpTemplate: commandHelpTemplate(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func diagCommand(ctx *cli.Context) error {
|
||||||
|
sctx, err := newSubcommandContext(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log := sctx.log
|
||||||
|
options := diagnostic.Options{
|
||||||
|
KnownAddresses: metrics.GetMetricsKnownAddresses(metrics.Runtime),
|
||||||
|
Address: sctx.c.String(metricsFlagName),
|
||||||
|
ContainerID: sctx.c.String(diagContainerIDFlagName),
|
||||||
|
PodID: sctx.c.String(diagPodFlagName),
|
||||||
|
Toggles: diagnostic.Toggles{
|
||||||
|
NoDiagLogs: sctx.c.Bool(noDiagLogsFlagName),
|
||||||
|
NoDiagMetrics: sctx.c.Bool(noDiagMetricsFlagName),
|
||||||
|
NoDiagSystem: sctx.c.Bool(noDiagSystemFlagName),
|
||||||
|
NoDiagRuntime: sctx.c.Bool(noDiagRuntimeFlagName),
|
||||||
|
NoDiagNetwork: sctx.c.Bool(noDiagNetworkFlagName),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.Address == "" {
|
||||||
|
log.Info().Msg("If your instance is running in a Docker/Kubernetes environment you need to setup port forwarding for your application.")
|
||||||
|
}
|
||||||
|
|
||||||
|
states, err := diagnostic.RunDiagnostic(log, options)
|
||||||
|
|
||||||
|
if errors.Is(err, diagnostic.ErrMetricsServerNotFound) {
|
||||||
|
log.Warn().Msg("No instances found")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if errors.Is(err, diagnostic.ErrMultipleMetricsServerFound) {
|
||||||
|
if states != nil {
|
||||||
|
log.Info().Msgf("Found multiple instances running:")
|
||||||
|
for _, state := range states {
|
||||||
|
log.Info().Msgf("Instance: tunnel-id=%s connector-id=%s metrics-address=%s", state.TunnelID, state.ConnectorID, state.URL.String())
|
||||||
|
}
|
||||||
|
log.Info().Msgf("To select one instance use the option --metrics")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.Is(err, diagnostic.ErrLogConfigurationIsInvalid) {
|
||||||
|
log.Info().Msg("Couldn't extract logs from the instance. If the instance is running in a containerized environment use the option --diag-container-id or --diag-pod-id. If there is no logging configuration use --no-diag-logs.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warn().Msg("Diagnostic completed with one or more errors")
|
||||||
|
} else {
|
||||||
|
log.Info().Msg("Diagnostic completed")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -162,17 +163,7 @@ func collectNetworkResultRoutine(
|
||||||
}
|
}
|
||||||
|
|
||||||
hops, raw, err := collector.Collect(ctx, network.NewTraceOptions(hopsNo, timeout, hostname, useIPv4))
|
hops, raw, err := collector.Collect(ctx, network.NewTraceOptions(hopsNo, timeout, hostname, useIPv4))
|
||||||
if err != nil {
|
results <- networkCollectionResult{name, hops, raw, err}
|
||||||
if raw == "" {
|
|
||||||
// An error happened and there is no raw output
|
|
||||||
results <- networkCollectionResult{name, nil, "", err}
|
|
||||||
} else {
|
|
||||||
// An error happened and there is raw output then write to file
|
|
||||||
results <- networkCollectionResult{name, nil, raw, nil}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
results <- networkCollectionResult{name, hops, raw, nil}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func gatherNetworkInformation(ctx context.Context) map[string]networkCollectionResult {
|
func gatherNetworkInformation(ctx context.Context) map[string]networkCollectionResult {
|
||||||
|
@ -209,10 +200,6 @@ func gatherNetworkInformation(ctx context.Context) map[string]networkCollectionR
|
||||||
|
|
||||||
for range len(hostAndIPversionPairs) {
|
for range len(hostAndIPversionPairs) {
|
||||||
result := <-results
|
result := <-results
|
||||||
if result.err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
resultMap[result.name] = result
|
resultMap[result.name] = result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -249,22 +236,30 @@ func rawNetworkInformationWriter(resultMap map[string]networkCollectionResult) (
|
||||||
|
|
||||||
defer networkDumpHandle.Close()
|
defer networkDumpHandle.Close()
|
||||||
|
|
||||||
|
var exitErr error
|
||||||
|
|
||||||
for k, v := range resultMap {
|
for k, v := range resultMap {
|
||||||
_, err := networkDumpHandle.WriteString(k + "\n" + v.raw + "\n")
|
if v.err != nil {
|
||||||
if err != nil {
|
if exitErr == nil {
|
||||||
return "", fmt.Errorf("error writing raw network information: %w", err)
|
exitErr = v.err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := networkDumpHandle.WriteString(k + "\nno content\n")
|
||||||
|
if err != nil {
|
||||||
|
return networkDumpHandle.Name(), fmt.Errorf("error writing 'no content' to raw network file: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
_, err := networkDumpHandle.WriteString(k + "\n" + v.raw + "\n")
|
||||||
|
if err != nil {
|
||||||
|
return networkDumpHandle.Name(), fmt.Errorf("error writing raw network information: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return networkDumpHandle.Name(), nil
|
return networkDumpHandle.Name(), exitErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func jsonNetworkInformationWriter(resultMap map[string]networkCollectionResult) (string, error) {
|
func jsonNetworkInformationWriter(resultMap map[string]networkCollectionResult) (string, error) {
|
||||||
jsonMap := make(map[string][]*network.Hop, len(resultMap))
|
|
||||||
for k, v := range resultMap {
|
|
||||||
jsonMap[k] = v.info
|
|
||||||
}
|
|
||||||
|
|
||||||
networkDumpHandle, err := os.Create(filepath.Join(os.TempDir(), networkBaseName))
|
networkDumpHandle, err := os.Create(filepath.Join(os.TempDir(), networkBaseName))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", ErrCreatingTemporaryFile
|
return "", ErrCreatingTemporaryFile
|
||||||
|
@ -274,12 +269,23 @@ func jsonNetworkInformationWriter(resultMap map[string]networkCollectionResult)
|
||||||
|
|
||||||
encoder := newFormattedEncoder(networkDumpHandle)
|
encoder := newFormattedEncoder(networkDumpHandle)
|
||||||
|
|
||||||
err = encoder.Encode(jsonMap)
|
var exitErr error
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error encoding network information results: %w", err)
|
jsonMap := make(map[string][]*network.Hop, len(resultMap))
|
||||||
|
for k, v := range resultMap {
|
||||||
|
jsonMap[k] = v.info
|
||||||
|
|
||||||
|
if exitErr == nil && v.err != nil {
|
||||||
|
exitErr = v.err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return networkDumpHandle.Name(), nil
|
err = encoder.Encode(jsonMap)
|
||||||
|
if err != nil {
|
||||||
|
return networkDumpHandle.Name(), fmt.Errorf("error encoding network information results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return networkDumpHandle.Name(), exitErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func collectFromEndpointAdapter(collect collectToWriterFunc, fileName string) collectFunc {
|
func collectFromEndpointAdapter(collect collectToWriterFunc, fileName string) collectFunc {
|
||||||
|
@ -292,7 +298,7 @@ func collectFromEndpointAdapter(collect collectToWriterFunc, fileName string) co
|
||||||
|
|
||||||
err = collect(ctx, dumpHandle)
|
err = collect(ctx, dumpHandle)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error running collector: %w", err)
|
return dumpHandle.Name(), fmt.Errorf("error running collector: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return dumpHandle.Name(), nil
|
return dumpHandle.Name(), nil
|
||||||
|
@ -316,8 +322,11 @@ func tunnelStateCollectEndpointAdapter(client HTTPClient, tunnel *TunnelState, f
|
||||||
encoder := newFormattedEncoder(writer)
|
encoder := newFormattedEncoder(writer)
|
||||||
|
|
||||||
err := encoder.Encode(tunnel)
|
err := encoder.Encode(tunnel)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error encoding tunnel state: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
return fmt.Errorf("error encoding tunnel state: %w", err)
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return collectFromEndpointAdapter(endpointFunc, fileName)
|
return collectFromEndpointAdapter(endpointFunc, fileName)
|
||||||
|
@ -337,15 +346,14 @@ func resolveInstanceBaseURL(
|
||||||
addresses []string,
|
addresses []string,
|
||||||
) (*url.URL, *TunnelState, []*AddressableTunnelState, error) {
|
) (*url.URL, *TunnelState, []*AddressableTunnelState, error) {
|
||||||
if metricsServerAddress != "" {
|
if metricsServerAddress != "" {
|
||||||
|
if !strings.HasPrefix(metricsServerAddress, "http://") {
|
||||||
|
metricsServerAddress = "http://" + metricsServerAddress
|
||||||
|
}
|
||||||
url, err := url.Parse(metricsServerAddress)
|
url, err := url.Parse(metricsServerAddress)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, fmt.Errorf("provided address is not valid: %w", err)
|
return nil, nil, nil, fmt.Errorf("provided address is not valid: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if url.Scheme == "" {
|
|
||||||
url.Scheme = "http://"
|
|
||||||
}
|
|
||||||
|
|
||||||
return url, nil, nil, nil
|
return url, nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -526,9 +534,15 @@ func RunDiagnostic(
|
||||||
jobsReport := runJobs(ctx, jobs, log)
|
jobsReport := runJobs(ctx, jobs, log)
|
||||||
paths := make([]string, 0)
|
paths := make([]string, 0)
|
||||||
|
|
||||||
|
var gerr error
|
||||||
|
|
||||||
for _, v := range jobsReport {
|
for _, v := range jobsReport {
|
||||||
paths = append(paths, v.path)
|
paths = append(paths, v.path)
|
||||||
|
|
||||||
|
if gerr == nil && v.Err != nil {
|
||||||
|
gerr = v.Err
|
||||||
|
}
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if !errors.Is(v.Err, ErrCreatingTemporaryFile) {
|
if !errors.Is(v.Err, ErrCreatingTemporaryFile) {
|
||||||
os.Remove(v.path)
|
os.Remove(v.path)
|
||||||
|
@ -538,14 +552,10 @@ func RunDiagnostic(
|
||||||
|
|
||||||
zipfile, err := CreateDiagnosticZipFile(zipName, paths)
|
zipfile, err := CreateDiagnosticZipFile(zipName, paths)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if zipfile != "" {
|
|
||||||
os.Remove(zipfile)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info().Msgf("Diagnostic file written: %v", zipfile)
|
log.Info().Msgf("Diagnostic file written: %v", zipfile)
|
||||||
|
|
||||||
return nil, nil
|
return nil, gerr
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue