From aab53642525c9cb8eb8f4b6e0a9c65e9631b9657 Mon Sep 17 00:00:00 2001 From: Luis Neto Date: Fri, 22 Nov 2024 08:10:05 -0800 Subject: [PATCH] TUN-8731: Implement diag/system endpoint ## Summary This PR will add a new endpoint, "diag/system" to the metrics server that collects system information from different operating systems. Closes TUN-8731 --- cmd/cloudflared/tunnel/cmd.go | 3 + diagnostic/consts.go | 9 + diagnostic/error.go | 16 + diagnostic/handlers.go | 83 +++++ diagnostic/handlers_test.go | 108 ++++++ diagnostic/system_collector.go | 70 ++++ diagnostic/system_collector_linux.go | 120 +++++++ diagnostic/system_collector_macos.go | 132 +++++++ diagnostic/system_collector_test.go | 466 +++++++++++++++++++++++++ diagnostic/system_collector_utils.go | 377 ++++++++++++++++++++ diagnostic/system_collector_windows.go | 153 ++++++++ metrics/metrics.go | 5 + 12 files changed, 1542 insertions(+) create mode 100644 diagnostic/consts.go create mode 100644 diagnostic/error.go create mode 100644 diagnostic/handlers.go create mode 100644 diagnostic/handlers_test.go create mode 100644 diagnostic/system_collector.go create mode 100644 diagnostic/system_collector_linux.go create mode 100644 diagnostic/system_collector_macos.go create mode 100644 diagnostic/system_collector_test.go create mode 100644 diagnostic/system_collector_utils.go create mode 100644 diagnostic/system_collector_windows.go diff --git a/cmd/cloudflared/tunnel/cmd.go b/cmd/cloudflared/tunnel/cmd.go index c900edb6..651bcb8d 100644 --- a/cmd/cloudflared/tunnel/cmd.go +++ b/cmd/cloudflared/tunnel/cmd.go @@ -28,6 +28,7 @@ import ( "github.com/cloudflare/cloudflared/config" "github.com/cloudflare/cloudflared/connection" "github.com/cloudflare/cloudflared/credentials" + "github.com/cloudflare/cloudflared/diagnostic" "github.com/cloudflare/cloudflared/edgediscovery" "github.com/cloudflare/cloudflared/features" "github.com/cloudflare/cloudflared/ingress" @@ -463,8 +464,10 @@ func StartServer( readinessServer := metrics.NewReadyServer(clientID, tunnelstate.NewConnTracker(log)) observer.RegisterSink(readinessServer) + diagnosticHandler := diagnostic.NewDiagnosticHandler(log, 0, diagnostic.NewSystemCollectorImpl(buildInfo.CloudflaredVersion)) metricsConfig := metrics.Config{ ReadyServer: readinessServer, + DiagnosticHandler: diagnosticHandler, QuickTunnelHostname: quickTunnelURL, Orchestrator: orchestrator, } diff --git a/diagnostic/consts.go b/diagnostic/consts.go new file mode 100644 index 00000000..8081cca8 --- /dev/null +++ b/diagnostic/consts.go @@ -0,0 +1,9 @@ +package diagnostic + +import "time" + +const ( + defaultCollectorTimeout = time.Second * 10 // This const define the timeout value of a collector operation. + collectorField = "collector" // used for logging purposes + systemCollectorName = "system" // used for logging purposes +) diff --git a/diagnostic/error.go b/diagnostic/error.go new file mode 100644 index 00000000..88884a48 --- /dev/null +++ b/diagnostic/error.go @@ -0,0 +1,16 @@ +package diagnostic + +import ( + "errors" +) + +var ( + // Error used when parsing the fields of the output of collector. + ErrInsufficientLines = errors.New("insufficient lines") + // Error used when parsing the lines of the output of collector. + ErrInsuficientFields = errors.New("insufficient fields") + // Error used when given key is not found while parsing KV. + ErrKeyNotFound = errors.New("key not found") + // Error used when tehre is no disk volume information available + ErrNoVolumeFound = errors.New("No disk volume information found") +) diff --git a/diagnostic/handlers.go b/diagnostic/handlers.go new file mode 100644 index 00000000..c9865795 --- /dev/null +++ b/diagnostic/handlers.go @@ -0,0 +1,83 @@ +package diagnostic + +import ( + "context" + "encoding/json" + "net/http" + "time" + + "github.com/rs/zerolog" +) + +type Handler struct { + log *zerolog.Logger + timeout time.Duration + systemCollector SystemCollector +} + +func NewDiagnosticHandler( + log *zerolog.Logger, + timeout time.Duration, + systemCollector SystemCollector, +) *Handler { + if timeout == 0 { + timeout = defaultCollectorTimeout + } + + return &Handler{ + log, + timeout, + systemCollector, + } +} + +func (handler *Handler) SystemHandler(writer http.ResponseWriter, request *http.Request) { + logger := handler.log.With().Str(collectorField, systemCollectorName).Logger() + logger.Info().Msg("Collection started") + + defer func() { + logger.Info().Msg("Collection finished") + }() + + ctx, cancel := context.WithTimeout(request.Context(), handler.timeout) + + defer cancel() + + info, rawInfo, err := handler.systemCollector.Collect(ctx) + if err != nil { + logger.Error().Err(err).Msg("error occurred whilst collecting system information") + + if rawInfo != "" { + logger.Info().Msg("using raw information fallback") + bytes := []byte(rawInfo) + writeResponse(writer, bytes, &logger) + } else { + logger.Error().Msg("no raw information available") + writer.WriteHeader(http.StatusInternalServerError) + } + + return + } + + if info == nil { + logger.Error().Msgf("system information collection is nil") + writer.WriteHeader(http.StatusInternalServerError) + } + + encoder := json.NewEncoder(writer) + + err = encoder.Encode(info) + if err != nil { + logger.Error().Err(err).Msgf("error occurred whilst serializing information") + writer.WriteHeader(http.StatusInternalServerError) + } +} + +func writeResponse(writer http.ResponseWriter, bytes []byte, logger *zerolog.Logger) { + bytesWritten, err := writer.Write(bytes) + if err != nil { + logger.Error().Err(err).Msg("error occurred writing response") + } else if bytesWritten != len(bytes) { + logger.Error().Msgf("error incomplete write response %d/%d", bytesWritten, len(bytes)) + } +} diff --git a/diagnostic/handlers_test.go b/diagnostic/handlers_test.go new file mode 100644 index 00000000..984501f3 --- /dev/null +++ b/diagnostic/handlers_test.go @@ -0,0 +1,108 @@ +package diagnostic_test + +import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/cloudflare/cloudflared/diagnostic" +) + +type SystemCollectorMock struct{} + +const ( + systemInformationKey = "sikey" + rawInformationKey = "rikey" + errorKey = "errkey" +) + +func setCtxValuesForSystemCollector( + systemInfo *diagnostic.SystemInformation, + rawInfo string, + err error, +) context.Context { + ctx := context.Background() + ctx = context.WithValue(ctx, systemInformationKey, systemInfo) + ctx = context.WithValue(ctx, rawInformationKey, rawInfo) + ctx = context.WithValue(ctx, errorKey, err) + + return ctx +} + +func (*SystemCollectorMock) Collect(ctx context.Context) (*diagnostic.SystemInformation, string, error) { + si, _ := ctx.Value(systemInformationKey).(*diagnostic.SystemInformation) + ri, _ := ctx.Value(rawInformationKey).(string) + err, _ := ctx.Value(errorKey).(error) + + return si, ri, err +} + +func TestSystemHandler(t *testing.T) { + t.Parallel() + + log := zerolog.Nop() + tests := []struct { + name string + systemInfo *diagnostic.SystemInformation + rawInfo string + err error + statusCode int + }{ + { + name: "happy path", + systemInfo: diagnostic.NewSystemInformation( + 0, 0, 0, 0, + "string", "string", "string", "string", + "string", "string", nil, + ), + rawInfo: "", + err: nil, + statusCode: http.StatusOK, + }, + { + name: "on error and raw info", systemInfo: nil, + rawInfo: "raw info", err: errors.New("an error"), statusCode: http.StatusOK, + }, + { + name: "on error and no raw info", systemInfo: nil, + rawInfo: "", err: errors.New("an error"), statusCode: http.StatusInternalServerError, + }, + { + name: "malformed response", systemInfo: nil, rawInfo: "", err: nil, statusCode: http.StatusInternalServerError, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + handler := diagnostic.NewDiagnosticHandler(&log, 0, &SystemCollectorMock{}) + recorder := httptest.NewRecorder() + ctx := setCtxValuesForSystemCollector(tCase.systemInfo, tCase.rawInfo, tCase.err) + request, err := http.NewRequestWithContext(ctx, http.MethodGet, "/diag/syste,", nil) + require.NoError(t, err) + handler.SystemHandler(recorder, request) + + assert.Equal(t, tCase.statusCode, recorder.Code) + if tCase.statusCode == http.StatusOK && tCase.systemInfo != nil { + var response diagnostic.SystemInformation + + decoder := json.NewDecoder(recorder.Body) + err = decoder.Decode(&response) + require.NoError(t, err) + assert.Equal(t, tCase.systemInfo, &response) + } else if tCase.statusCode == http.StatusOK && tCase.rawInfo != "" { + rawBytes, err := io.ReadAll(recorder.Body) + require.NoError(t, err) + assert.Equal(t, tCase.rawInfo, string(rawBytes)) + } + }) + } +} diff --git a/diagnostic/system_collector.go b/diagnostic/system_collector.go new file mode 100644 index 00000000..08f2a47f --- /dev/null +++ b/diagnostic/system_collector.go @@ -0,0 +1,70 @@ +package diagnostic + +import "context" + +type DiskVolumeInformation struct { + Name string `json:"name"` // represents the filesystem in linux/macos or device name in windows + SizeMaximum uint64 `json:"sizeMaximum"` // represents the maximum size of the disk in kilobytes + SizeCurrent uint64 `json:"sizeCurrent"` // represents the current size of the disk in kilobytes +} + +func NewDiskVolumeInformation(name string, maximum, current uint64) *DiskVolumeInformation { + return &DiskVolumeInformation{ + name, + maximum, + current, + } +} + +type SystemInformation struct { + MemoryMaximum uint64 `json:"memoryMaximum"` // represents the maximum memory of the system in kilobytes + MemoryCurrent uint64 `json:"memoryCurrent"` // represents the system's memory in use in kilobytes + FileDescriptorMaximum uint64 `json:"fileDescriptorMaximum"` // represents the maximum number of file descriptors of the system + FileDescriptorCurrent uint64 `json:"fileDescriptorCurrent"` // represents the system's file descriptors in use + OsSystem string `json:"osSystem"` // represents the operating system name i.e.: linux, windows, darwin + HostName string `json:"hostName"` // represents the system host name + OsVersion string `json:"osVersion"` // detailed information about the system's release version level + OsRelease string `json:"osRelease"` // detailed information about the system's release + Architecture string `json:"architecture"` // represents the system's hardware platform i.e: arm64/amd64 + CloudflaredVersion string `json:"cloudflaredVersion"` // the runtime version of cloudflared + Disk []*DiskVolumeInformation `json:"disk"` +} + +func NewSystemInformation( + memoryMaximum, + memoryCurrent, + filesMaximum, + filesCurrent uint64, + osystem, + name, + osVersion, + osRelease, + architecture, + cloudflaredVersion string, + disk []*DiskVolumeInformation, +) *SystemInformation { + return &SystemInformation{ + memoryMaximum, + memoryCurrent, + filesMaximum, + filesCurrent, + osystem, + name, + osVersion, + osRelease, + architecture, + cloudflaredVersion, + disk, + } +} + +type SystemCollector interface { + // If the collection is successful it will return `SystemInformation` struct, + // an empty string, and a nil error. + // In case there is an error a string with the raw data will be returned + // however the returned string not contain all the data points. + // + // This function expects that the caller sets the context timeout to prevent + // long-lived collectors. + Collect(ctx context.Context) (*SystemInformation, string, error) +} diff --git a/diagnostic/system_collector_linux.go b/diagnostic/system_collector_linux.go new file mode 100644 index 00000000..35d3cc9b --- /dev/null +++ b/diagnostic/system_collector_linux.go @@ -0,0 +1,120 @@ +//go:build linux + +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "strconv" + "strings" +) + +type SystemCollectorImpl struct { + version string +} + +func NewSystemCollectorImpl( + version string, +) *SystemCollectorImpl { + return &SystemCollectorImpl{ + version, + } +} + +func (collector *SystemCollectorImpl) Collect(ctx context.Context) (*SystemInformation, string, error) { + memoryInfo, memoryInfoRaw, memoryInfoErr := collectMemoryInformation(ctx) + fdInfo, fdInfoRaw, fdInfoErr := collectFileDescriptorInformation(ctx) + disks, disksRaw, diskErr := collectDiskVolumeInformationUnix(ctx) + osInfo, osInfoRaw, osInfoErr := collectOSInformationUnix(ctx) + + if memoryInfoErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw) + return nil, raw, memoryInfoErr + } + + if fdInfoErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw) + return nil, raw, fdInfoErr + } + + if diskErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw) + return nil, raw, diskErr + } + + if osInfoErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw) + return nil, raw, osInfoErr + } + + return NewSystemInformation( + memoryInfo.MemoryMaximum, + memoryInfo.MemoryCurrent, + fdInfo.FileDescriptorMaximum, + fdInfo.FileDescriptorCurrent, + osInfo.OsSystem, + osInfo.Name, + osInfo.OsVersion, + osInfo.OsRelease, + osInfo.Architecture, + collector.version, + disks, + ), "", nil +} + +func collectMemoryInformation(ctx context.Context) (*MemoryInformation, string, error) { + // This function relies on the output of `cat /proc/meminfo` to retrieve + // memoryMax and memoryCurrent. + // The expected output is in the format of `KEY VALUE UNIT`. + const ( + memTotalPrefix = "MemTotal" + memAvailablePrefix = "MemAvailable" + ) + + command := exec.CommandContext(ctx, "cat", "/proc/meminfo") + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + mapper := func(field string) (uint64, error) { + field = strings.TrimRight(field, " kB") + + return strconv.ParseUint(field, 10, 64) + } + + memoryInfo, err := ParseMemoryInformationFromKV(output, memTotalPrefix, memAvailablePrefix, mapper) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return memoryInfo, output, nil +} + +func collectFileDescriptorInformation(ctx context.Context) (*FileDescriptorInformation, string, error) { + // Command retrieved from https://docs.kernel.org/admin-guide/sysctl/fs.html#file-max-file-nr. + // If the sysctl is not available the command with fail. + command := exec.CommandContext(ctx, "sysctl", "-n", "fs.file-nr") + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + fileDescriptorInfo, err := ParseSysctlFileDescriptorInformation(output) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return fileDescriptorInfo, output, nil +} diff --git a/diagnostic/system_collector_macos.go b/diagnostic/system_collector_macos.go new file mode 100644 index 00000000..9ec96cfd --- /dev/null +++ b/diagnostic/system_collector_macos.go @@ -0,0 +1,132 @@ +//go:build darwin + +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "strconv" +) + +type SystemCollectorImpl struct { + version string +} + +func NewSystemCollectorImpl( + version string, +) *SystemCollectorImpl { + return &SystemCollectorImpl{ + version, + } +} + +func (collector *SystemCollectorImpl) Collect(ctx context.Context) (*SystemInformation, string, error) { + memoryInfo, memoryInfoRaw, memoryInfoErr := collectMemoryInformation(ctx) + fdInfo, fdInfoRaw, fdInfoErr := collectFileDescriptorInformation(ctx) + disks, disksRaw, diskErr := collectDiskVolumeInformationUnix(ctx) + osInfo, osInfoRaw, osInfoErr := collectOSInformationUnix(ctx) + + if memoryInfoErr != nil { + return nil, RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw), memoryInfoErr + } + + if fdInfoErr != nil { + return nil, RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw), fdInfoErr + } + + if diskErr != nil { + return nil, RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw), diskErr + } + + if osInfoErr != nil { + return nil, RawSystemInformation(osInfoRaw, memoryInfoRaw, fdInfoRaw, disksRaw), osInfoErr + } + + return NewSystemInformation( + memoryInfo.MemoryMaximum, + memoryInfo.MemoryCurrent, + fdInfo.FileDescriptorMaximum, + fdInfo.FileDescriptorCurrent, + osInfo.OsSystem, + osInfo.Name, + osInfo.OsVersion, + osInfo.OsRelease, + osInfo.Architecture, + collector.version, + disks, + ), "", nil +} + +func collectFileDescriptorInformation(ctx context.Context) ( + *FileDescriptorInformation, + string, + error, +) { + const ( + fileDescriptorMaximumKey = "kern.maxfiles" + fileDescriptorCurrentKey = "kern.num_files" + ) + + command := exec.CommandContext(ctx, "sysctl", fileDescriptorMaximumKey, fileDescriptorCurrentKey) + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + fileDescriptorInfo, err := ParseFileDescriptorInformationFromKV( + output, + fileDescriptorMaximumKey, + fileDescriptorCurrentKey, + ) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return fileDescriptorInfo, output, nil +} + +func collectMemoryInformation(ctx context.Context) ( + *MemoryInformation, + string, + error, +) { + const ( + memoryMaximumKey = "hw.memsize" + memoryAvailableKey = "hw.memsize_usable" + ) + + command := exec.CommandContext( + ctx, + "sysctl", + memoryMaximumKey, + memoryAvailableKey, + ) + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + mapper := func(field string) (uint64, error) { + const kiloBytes = 1024 + value, err := strconv.ParseUint(field, 10, 64) + return value / kiloBytes, err + } + + memoryInfo, err := ParseMemoryInformationFromKV(output, memoryMaximumKey, memoryAvailableKey, mapper) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return memoryInfo, output, nil +} diff --git a/diagnostic/system_collector_test.go b/diagnostic/system_collector_test.go new file mode 100644 index 00000000..c9338740 --- /dev/null +++ b/diagnostic/system_collector_test.go @@ -0,0 +1,466 @@ +package diagnostic_test + +import ( + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/cloudflare/cloudflared/diagnostic" +) + +func TestParseMemoryInformationFromKV(t *testing.T) { + t.Parallel() + + mapper := func(field string) (uint64, error) { + value, err := strconv.ParseUint(field, 10, 64) + return value, err + } + + linuxMapper := func(field string) (uint64, error) { + field = strings.TrimRight(field, " kB") + return strconv.ParseUint(field, 10, 64) + } + + windowsMemoryOutput := ` + +FreeVirtualMemory : 5350472 +TotalVirtualMemorySize : 8903424 + + +` + macosMemoryOutput := `hw.memsize: 38654705664 +hw.memsize_usable: 38009012224` + memoryOutputWithMissingKey := `hw.memsize: 38654705664` + + linuxMemoryOutput := `MemTotal: 8028860 kB +MemFree: 731396 kB +MemAvailable: 4678844 kB +Buffers: 472632 kB +Cached: 3186492 kB +SwapCached: 4196 kB +Active: 3088988 kB +Inactive: 3468560 kB` + + tests := []struct { + name string + output string + memoryMaximumKey string + memoryAvailableKey string + expected *diagnostic.MemoryInformation + expectedErr bool + mapper func(string) (uint64, error) + }{ + { + name: "parse linux memory values", + output: linuxMemoryOutput, + memoryMaximumKey: "MemTotal", + memoryAvailableKey: "MemAvailable", + expected: &diagnostic.MemoryInformation{ + 8028860, + 8028860 - 4678844, + }, + expectedErr: false, + mapper: linuxMapper, + }, + { + name: "parse memory values with missing key", + output: memoryOutputWithMissingKey, + memoryMaximumKey: "hw.memsize", + memoryAvailableKey: "hw.memsize_usable", + expected: nil, + expectedErr: true, + mapper: mapper, + }, + { + name: "parse macos memory values", + output: macosMemoryOutput, + memoryMaximumKey: "hw.memsize", + memoryAvailableKey: "hw.memsize_usable", + expected: &diagnostic.MemoryInformation{ + 38654705664, + 38654705664 - 38009012224, + }, + expectedErr: false, + mapper: mapper, + }, + { + name: "parse windows memory values", + output: windowsMemoryOutput, + memoryMaximumKey: "TotalVirtualMemorySize", + memoryAvailableKey: "FreeVirtualMemory", + expected: &diagnostic.MemoryInformation{ + 8903424, + 8903424 - 5350472, + }, + expectedErr: false, + mapper: mapper, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + memoryInfo, err := diagnostic.ParseMemoryInformationFromKV( + tCase.output, + tCase.memoryMaximumKey, + tCase.memoryAvailableKey, + tCase.mapper, + ) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, memoryInfo) + } + }) + } +} + +func TestParseUnameOutput(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + output string + os string + expected *diagnostic.OsInfo + expectedErr bool + }{ + { + name: "darwin machine", + output: "Darwin APC 23.6.0 Darwin Kernel Version 99.6.0: Wed Jul 31 20:48:04 PDT 1997; root:xnu-66666.666.6.666.6~1/RELEASE_ARM64_T6666 arm64", + os: "darwin", + expected: &diagnostic.OsInfo{ + Architecture: "arm64", + Name: "APC", + OsSystem: "Darwin", + OsRelease: "Darwin Kernel Version 99.6.0: Wed Jul 31 20:48:04 PDT 1997; root:xnu-66666.666.6.666.6~1/RELEASE_ARM64_T6666", + OsVersion: "23.6.0", + }, + expectedErr: false, + }, + { + name: "linux machine", + output: "Linux dab00d565591 6.6.31-linuxkit #1 SMP Thu May 23 08:36:57 UTC 2024 aarch64 GNU/Linux", + os: "linux", + expected: &diagnostic.OsInfo{ + Architecture: "aarch64", + Name: "dab00d565591", + OsSystem: "Linux", + OsRelease: "#1 SMP Thu May 23 08:36:57 UTC 2024", + OsVersion: "6.6.31-linuxkit", + }, + expectedErr: false, + }, + { + name: "not enough fields", + output: "Linux ", + os: "linux", + expected: nil, + expectedErr: true, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + memoryInfo, err := diagnostic.ParseUnameOutput( + tCase.output, + tCase.os, + ) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, memoryInfo) + } + }) + } +} + +func TestParseFileDescriptorInformationFromKV(t *testing.T) { + const ( + fileDescriptorMaximumKey = "kern.maxfiles" + fileDescriptorCurrentKey = "kern.num_files" + ) + + t.Parallel() + + memoryOutput := `kern.maxfiles: 276480 +kern.num_files: 11787` + memoryOutputWithMissingKey := `kern.maxfiles: 276480` + + tests := []struct { + name string + output string + expected *diagnostic.FileDescriptorInformation + expectedErr bool + }{ + { + name: "parse memory values with missing key", + output: memoryOutputWithMissingKey, + expected: nil, + expectedErr: true, + }, + { + name: "parse macos memory values", + output: memoryOutput, + expected: &diagnostic.FileDescriptorInformation{ + 276480, + 11787, + }, + expectedErr: false, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + fdInfo, err := diagnostic.ParseFileDescriptorInformationFromKV( + tCase.output, + fileDescriptorMaximumKey, + fileDescriptorCurrentKey, + ) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, fdInfo) + } + }) + } +} + +func TestParseSysctlFileDescriptorInformation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + output string + expected *diagnostic.FileDescriptorInformation + expectedErr bool + }{ + { + name: "expected output", + output: "111 0 1111111", + expected: &diagnostic.FileDescriptorInformation{ + FileDescriptorMaximum: 1111111, + FileDescriptorCurrent: 111, + }, + expectedErr: false, + }, + { + name: "not enough fields", + output: "111 111 ", + expected: nil, + expectedErr: true, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + fdsInfo, err := diagnostic.ParseSysctlFileDescriptorInformation( + tCase.output, + ) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, fdsInfo) + } + }) + } +} + +func TestParseWinOperatingSystemInfo(t *testing.T) { + const ( + architecturePrefix = "OSArchitecture" + osSystemPrefix = "Caption" + osVersionPrefix = "Version" + osReleasePrefix = "BuildNumber" + namePrefix = "CSName" + ) + + t.Parallel() + + windowsIncompleteOsInfo := ` +OSArchitecture : ARM 64 bits +Caption : Microsoft Windows 11 Home +Morekeys : 121314 +CSName : UTILIZA-QO859QP +` + windowsCompleteOsInfo := ` +OSArchitecture : ARM 64 bits +Caption : Microsoft Windows 11 Home +Version : 10.0.22631 +BuildNumber : 22631 +Morekeys : 121314 +CSName : UTILIZA-QO859QP +` + + tests := []struct { + name string + output string + expected *diagnostic.OsInfo + expectedErr bool + }{ + { + name: "expected output", + output: windowsCompleteOsInfo, + expected: &diagnostic.OsInfo{ + Architecture: "ARM 64 bits", + Name: "UTILIZA-QO859QP", + OsSystem: "Microsoft Windows 11 Home", + OsRelease: "22631", + OsVersion: "10.0.22631", + }, + expectedErr: false, + }, + { + name: "missing keys", + output: windowsIncompleteOsInfo, + expected: nil, + expectedErr: true, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + osInfo, err := diagnostic.ParseWinOperatingSystemInfo( + tCase.output, + architecturePrefix, + osSystemPrefix, + osVersionPrefix, + osReleasePrefix, + namePrefix, + ) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, osInfo) + } + }) + } +} + +func TestParseDiskVolumeInformationOutput(t *testing.T) { + t.Parallel() + + invalidUnixDiskVolumeInfo := `Filesystem Size Used Avail Use% Mounted on +overlay 59G 19G 38G 33% / +tmpfs 64M 0 64M 0% /dev +shm 64M 0 64M 0% /dev/shm +/run/host_mark/Users 461G 266G 195G 58% /tmp/cloudflared +/dev/vda1 59G 19G 38G 33% /etc/hosts +tmpfs 3.9G 0 3.9G 0% /sys/firmware +` + + unixDiskVolumeInfo := `Filesystem Size Used Avail Use% Mounted on +overlay 61202244 18881444 39179476 33% / +tmpfs 65536 0 65536 0% /dev +shm 65536 0 65536 0% /dev/shm +/run/host_mark/Users 482797652 278648468 204149184 58% /tmp/cloudflared +/dev/vda1 61202244 18881444 39179476 33% /etc/hosts +tmpfs 4014428 0 4014428 0% /sys/firmware` + missingFields := ` DeviceID Size +-------- ---- +C: size +E: 235563008 +Z: 67754782720 +` + invalidTypeField := ` DeviceID Size FreeSpace +-------- ---- --------- +C: size 31318736896 +D: +E: 235563008 0 +Z: 67754782720 31318732800 +` + + windowsDiskVolumeInfo := ` + +DeviceID Size FreeSpace +-------- ---- --------- +C: 67754782720 31318736896 +E: 235563008 0 +Z: 67754782720 31318732800` + + tests := []struct { + name string + output string + expected []*diagnostic.DiskVolumeInformation + skipLines int + expectedErr bool + }{ + { + name: "invalid unix disk volume information (numbers have units)", + output: invalidUnixDiskVolumeInfo, + expected: []*diagnostic.DiskVolumeInformation{}, + skipLines: 1, + expectedErr: true, + }, + { + name: "unix disk volume information", + output: unixDiskVolumeInfo, + skipLines: 1, + expected: []*diagnostic.DiskVolumeInformation{ + diagnostic.NewDiskVolumeInformation("overlay", 61202244, 18881444), + diagnostic.NewDiskVolumeInformation("tmpfs", 65536, 0), + diagnostic.NewDiskVolumeInformation("shm", 65536, 0), + diagnostic.NewDiskVolumeInformation("/run/host_mark/Users", 482797652, 278648468), + diagnostic.NewDiskVolumeInformation("/dev/vda1", 61202244, 18881444), + diagnostic.NewDiskVolumeInformation("tmpfs", 4014428, 0), + }, + expectedErr: false, + }, + { + name: "windows disk volume information", + output: windowsDiskVolumeInfo, + expected: []*diagnostic.DiskVolumeInformation{ + diagnostic.NewDiskVolumeInformation("C:", 67754782720, 31318736896), + diagnostic.NewDiskVolumeInformation("E:", 235563008, 0), + diagnostic.NewDiskVolumeInformation("Z:", 67754782720, 31318732800), + }, + skipLines: 4, + expectedErr: false, + }, + { + name: "insuficient fields", + output: missingFields, + expected: nil, + skipLines: 2, + expectedErr: true, + }, + { + name: "invalid field", + output: invalidTypeField, + expected: nil, + skipLines: 2, + expectedErr: true, + }, + } + + for _, tCase := range tests { + t.Run(tCase.name, func(t *testing.T) { + t.Parallel() + disks, err := diagnostic.ParseDiskVolumeInformationOutput(tCase.output, tCase.skipLines, 1) + + if tCase.expectedErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tCase.expected, disks) + } + }) + } +} diff --git a/diagnostic/system_collector_utils.go b/diagnostic/system_collector_utils.go new file mode 100644 index 00000000..2ea17209 --- /dev/null +++ b/diagnostic/system_collector_utils.go @@ -0,0 +1,377 @@ +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "runtime" + "sort" + "strconv" + "strings" +) + +func findColonSeparatedPairs[V any](output string, keys []string, mapper func(string) (V, error)) map[string]V { + const ( + memoryField = 1 + memoryInformationFields = 2 + ) + + lines := strings.Split(output, "\n") + pairs := make(map[string]V, 0) + + // sort keys and lines to allow incremental search + sort.Strings(lines) + sort.Strings(keys) + + // keeps track of the last key found + lastIndex := 0 + + for _, line := range lines { + if lastIndex == len(keys) { + // already found all keys no need to continue iterating + // over the other values + break + } + + for index, key := range keys[lastIndex:] { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, key) { + fields := strings.Split(line, ":") + if len(fields) < memoryInformationFields { + lastIndex = index + 1 + + break + } + + field, err := mapper(strings.TrimSpace(fields[memoryField])) + if err != nil { + lastIndex = lastIndex + index + 1 + + break + } + + pairs[key] = field + lastIndex = lastIndex + index + 1 + + break + } + } + } + + return pairs +} + +func ParseDiskVolumeInformationOutput(output string, skipLines int, scale float64) ([]*DiskVolumeInformation, error) { + const ( + diskFieldsMinimum = 3 + nameField = 0 + sizeMaximumField = 1 + sizeCurrentField = 2 + ) + + disksRaw := strings.Split(output, "\n") + disks := make([]*DiskVolumeInformation, 0) + + if skipLines > len(disksRaw) || skipLines < 0 { + skipLines = 0 + } + + for _, disk := range disksRaw[skipLines:] { + if disk == "" { + // skip empty line + continue + } + + fields := strings.Fields(disk) + if len(fields) < diskFieldsMinimum { + return nil, fmt.Errorf("expected disk volume to have %d fields got %d: %w", + diskFieldsMinimum, len(fields), ErrInsuficientFields, + ) + } + + name := fields[nameField] + + sizeMaximum, err := strconv.ParseUint(fields[sizeMaximumField], 10, 64) + if err != nil { + continue + } + + sizeCurrent, err := strconv.ParseUint(fields[sizeCurrentField], 10, 64) + if err != nil { + continue + } + + diskInfo := NewDiskVolumeInformation( + name, uint64(float64(sizeMaximum)*scale), uint64(float64(sizeCurrent)*scale), + ) + disks = append(disks, diskInfo) + } + + if len(disks) == 0 { + return nil, ErrNoVolumeFound + } + + return disks, nil +} + +type OsInfo struct { + OsSystem string + Name string + OsVersion string + OsRelease string + Architecture string +} + +func ParseUnameOutput(output string, system string) (*OsInfo, error) { + const ( + osystemField = 0 + nameField = 1 + osVersionField = 2 + osReleaseStartField = 3 + osInformationFieldsMinimum = 6 + darwin = "darwin" + ) + + architectureOffset := 2 + if system == darwin { + architectureOffset = 1 + } + + fields := strings.Fields(output) + if len(fields) < osInformationFieldsMinimum { + return nil, fmt.Errorf("expected system information to have %d fields got %d: %w", + osInformationFieldsMinimum, len(fields), ErrInsuficientFields, + ) + } + + architectureField := len(fields) - architectureOffset + osystem := fields[osystemField] + name := fields[nameField] + osVersion := fields[osVersionField] + osRelease := strings.Join(fields[osReleaseStartField:architectureField], " ") + architecture := fields[architectureField] + + return &OsInfo{ + osystem, + name, + osVersion, + osRelease, + architecture, + }, nil +} + +func ParseWinOperatingSystemInfo( + output string, + architectureKey string, + osSystemKey string, + osVersionKey string, + osReleaseKey string, + nameKey string, +) (*OsInfo, error) { + identity := func(s string) (string, error) { return s, nil } + + keys := []string{architectureKey, osSystemKey, osVersionKey, osReleaseKey, nameKey} + pairs := findColonSeparatedPairs( + output, + keys, + identity, + ) + + architecture, exists := pairs[architectureKey] + if !exists { + return nil, fmt.Errorf("parsing os information: %w, key=%s", ErrKeyNotFound, architectureKey) + } + + osSystem, exists := pairs[osSystemKey] + if !exists { + return nil, fmt.Errorf("parsing os information: %w, key=%s", ErrKeyNotFound, osSystemKey) + } + + osVersion, exists := pairs[osVersionKey] + if !exists { + return nil, fmt.Errorf("parsing os information: %w, key=%s", ErrKeyNotFound, osVersionKey) + } + + osRelease, exists := pairs[osReleaseKey] + if !exists { + return nil, fmt.Errorf("parsing os information: %w, key=%s", ErrKeyNotFound, osReleaseKey) + } + + name, exists := pairs[nameKey] + if !exists { + return nil, fmt.Errorf("parsing os information: %w, key=%s", ErrKeyNotFound, nameKey) + } + + return &OsInfo{osSystem, name, osVersion, osRelease, architecture}, nil +} + +type FileDescriptorInformation struct { + FileDescriptorMaximum uint64 + FileDescriptorCurrent uint64 +} + +func ParseSysctlFileDescriptorInformation(output string) (*FileDescriptorInformation, error) { + const ( + openFilesField = 0 + maxFilesField = 2 + fileDescriptorLimitsFields = 3 + ) + + fields := strings.Fields(output) + + if len(fields) != fileDescriptorLimitsFields { + return nil, + fmt.Errorf( + "expected file descriptor information to have %d fields got %d: %w", + fileDescriptorLimitsFields, + len(fields), + ErrInsuficientFields, + ) + } + + fileDescriptorCurrent, err := strconv.ParseUint(fields[openFilesField], 10, 64) + if err != nil { + return nil, fmt.Errorf( + "error parsing files current field '%s': %w", + fields[openFilesField], + err, + ) + } + + fileDescriptorMaximum, err := strconv.ParseUint(fields[maxFilesField], 10, 64) + if err != nil { + return nil, fmt.Errorf("error parsing files max field '%s': %w", fields[maxFilesField], err) + } + + return &FileDescriptorInformation{fileDescriptorMaximum, fileDescriptorCurrent}, nil +} + +func ParseFileDescriptorInformationFromKV( + output string, + fileDescriptorMaximumKey string, + fileDescriptorCurrentKey string, +) (*FileDescriptorInformation, error) { + mapper := func(field string) (uint64, error) { + return strconv.ParseUint(field, 10, 64) + } + + pairs := findColonSeparatedPairs(output, []string{fileDescriptorMaximumKey, fileDescriptorCurrentKey}, mapper) + + fileDescriptorMaximum, exists := pairs[fileDescriptorMaximumKey] + if !exists { + return nil, fmt.Errorf( + "parsing file descriptor information: %w, key=%s", + ErrKeyNotFound, + fileDescriptorMaximumKey, + ) + } + + fileDescriptorCurrent, exists := pairs[fileDescriptorCurrentKey] + if !exists { + return nil, fmt.Errorf( + "parsing file descriptor information: %w, key=%s", + ErrKeyNotFound, + fileDescriptorCurrentKey, + ) + } + + return &FileDescriptorInformation{fileDescriptorMaximum, fileDescriptorCurrent}, nil +} + +type MemoryInformation struct { + MemoryMaximum uint64 // size in KB + MemoryCurrent uint64 // size in KB +} + +func ParseMemoryInformationFromKV( + output string, + memoryMaximumKey string, + memoryAvailableKey string, + mapper func(field string) (uint64, error), +) (*MemoryInformation, error) { + pairs := findColonSeparatedPairs(output, []string{memoryMaximumKey, memoryAvailableKey}, mapper) + + memoryMaximum, exists := pairs[memoryMaximumKey] + if !exists { + return nil, fmt.Errorf("parsing memory information: %w, key=%s", ErrKeyNotFound, memoryMaximumKey) + } + + memoryAvailable, exists := pairs[memoryAvailableKey] + if !exists { + return nil, fmt.Errorf("parsing memory information: %w, key=%s", ErrKeyNotFound, memoryAvailableKey) + } + + memoryCurrent := memoryMaximum - memoryAvailable + + return &MemoryInformation{memoryMaximum, memoryCurrent}, nil +} + +func RawSystemInformation(osInfoRaw string, memoryInfoRaw string, fdInfoRaw string, disksRaw string) string { + var builder strings.Builder + + formatInfo := func(info string, builder *strings.Builder) { + if info == "" { + builder.WriteString("No information\n") + } else { + builder.WriteString(info) + builder.WriteString("\n") + } + } + + builder.WriteString("---BEGIN Operating system information\n") + formatInfo(osInfoRaw, &builder) + builder.WriteString("---END Operating system information\n") + builder.WriteString("---BEGIN Memory information\n") + formatInfo(memoryInfoRaw, &builder) + builder.WriteString("---END Memory information\n") + builder.WriteString("---BEGIN File descriptors information\n") + formatInfo(fdInfoRaw, &builder) + builder.WriteString("---END File descriptors information\n") + builder.WriteString("---BEGIN Disks information\n") + formatInfo(disksRaw, &builder) + builder.WriteString("---END Disks information\n") + + rawInformation := builder.String() + + return rawInformation +} + +func collectDiskVolumeInformationUnix(ctx context.Context) ([]*DiskVolumeInformation, string, error) { + command := exec.CommandContext(ctx, "df", "-k") + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + disks, err := ParseDiskVolumeInformationOutput(output, 1, 1) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return disks, output, nil +} + +func collectOSInformationUnix(ctx context.Context) (*OsInfo, string, error) { + command := exec.CommandContext(ctx, "uname", "-a") + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + osInfo, err := ParseUnameOutput(output, runtime.GOOS) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return osInfo, output, nil +} diff --git a/diagnostic/system_collector_windows.go b/diagnostic/system_collector_windows.go new file mode 100644 index 00000000..0866a739 --- /dev/null +++ b/diagnostic/system_collector_windows.go @@ -0,0 +1,153 @@ +//go:build windows + +package diagnostic + +import ( + "context" + "fmt" + "os/exec" + "strconv" +) + +const kiloBytesScale = 1.0 / 1024 + +type SystemCollectorImpl struct { + version string +} + +func NewSystemCollectorImpl( + version string, +) *SystemCollectorImpl { + return &SystemCollectorImpl{ + version, + } +} +func (collector *SystemCollectorImpl) Collect(ctx context.Context) (*SystemInformation, string, error) { + memoryInfo, memoryInfoRaw, memoryInfoErr := collectMemoryInformation(ctx) + disks, disksRaw, diskErr := collectDiskVolumeInformation(ctx) + osInfo, osInfoRaw, osInfoErr := collectOSInformation(ctx) + + if memoryInfoErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, "", disksRaw) + return nil, raw, memoryInfoErr + } + + if diskErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, "", disksRaw) + return nil, raw, diskErr + } + + if osInfoErr != nil { + raw := RawSystemInformation(osInfoRaw, memoryInfoRaw, "", disksRaw) + return nil, raw, osInfoErr + } + + return NewSystemInformation( + memoryInfo.MemoryMaximum, + memoryInfo.MemoryCurrent, + // For windows we leave both the fileDescriptorMaximum and fileDescriptorCurrent with zero + // since there is no obvious way to get this information. + 0, + 0, + osInfo.OsSystem, + osInfo.Name, + osInfo.OsVersion, + osInfo.OsRelease, + osInfo.Architecture, + collector.version, + disks, + ), "", nil +} + +func collectMemoryInformation(ctx context.Context) (*MemoryInformation, string, error) { + const ( + memoryTotalPrefix = "TotalVirtualMemorySize" + memoryAvailablePrefix = "FreeVirtualMemory" + ) + + command := exec.CommandContext( + ctx, + "powershell", + "-Command", + "Get-CimInstance -Class Win32_OperatingSystem | Select-Object FreeVirtualMemory, TotalVirtualMemorySize | Format-List", + ) + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + // the result of the command above will return values in bytes hence + // they need to be converted to kilobytes + mapper := func(field string) (uint64, error) { + value, err := strconv.ParseUint(field, 10, 64) + return uint64(float64(value) * kiloBytesScale), err + } + + memoryInfo, err := ParseMemoryInformationFromKV(output, memoryTotalPrefix, memoryAvailablePrefix, mapper) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return memoryInfo, output, nil +} + +func collectDiskVolumeInformation(ctx context.Context) ([]*DiskVolumeInformation, string, error) { + + command := exec.CommandContext( + ctx, + "powershell", "-Command", "Get-CimInstance -Class Win32_LogicalDisk | Select-Object DeviceID, Size, FreeSpace") + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + disks, err := ParseDiskVolumeInformationOutput(output, 2, kiloBytesScale) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return disks, output, nil +} + +func collectOSInformation(ctx context.Context) (*OsInfo, string, error) { + const ( + architecturePrefix = "OSArchitecture" + osSystemPrefix = "Caption" + osVersionPrefix = "Version" + osReleasePrefix = "BuildNumber" + namePrefix = "CSName" + ) + + command := exec.CommandContext( + ctx, + "powershell", + "-Command", + "Get-CimInstance -Class Win32_OperatingSystem | Select-Object OSArchitecture, Caption, Version, BuildNumber, CSName | Format-List", + ) + + stdout, err := command.Output() + if err != nil { + return nil, "", fmt.Errorf("error retrieving output from command '%s': %w", command.String(), err) + } + + output := string(stdout) + + osInfo, err := ParseWinOperatingSystemInfo(output, architecturePrefix, osSystemPrefix, osVersionPrefix, osReleasePrefix, namePrefix) + if err != nil { + return nil, output, err + } + + // returning raw output in case other collected information + // resulted in errors + return osInfo, output, nil +} diff --git a/metrics/metrics.go b/metrics/metrics.go index 6aadffeb..77e5e9a6 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -15,6 +15,8 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/zerolog" "golang.org/x/net/trace" + + "github.com/cloudflare/cloudflared/diagnostic" ) const ( @@ -52,6 +54,7 @@ func GetMetricsKnownAddresses(runtimeType string) [5]string { type Config struct { ReadyServer *ReadyServer + DiagnosticHandler *diagnostic.Handler QuickTunnelHostname string Orchestrator orchestrator @@ -91,6 +94,8 @@ func newMetricsHandler( }) } + router.HandleFunc("/diag/system", config.DiagnosticHandler.SystemHandler) + return router }