TUN-8243: Collect metrics on the number of QUIC frames sent/received

This commit also removed the server metrics that is no longer used
This commit is contained in:
chungthuang 2024-02-16 11:28:59 +00:00 committed by Chung-Ting Huang
parent 971360d5e0
commit 34a876e4e7
3 changed files with 106 additions and 364 deletions

View File

@ -7,17 +7,6 @@ import (
"github.com/quic-go/quic-go/logging"
)
func perspectiveString(p logging.Perspective) string {
switch p {
case logging.PerspectiveClient:
return "client"
case logging.PerspectiveServer:
return "server"
default:
return ""
}
}
// Helper to convert logging.ByteCount(alias for int64) to float64 used in prometheus
func byteCountToPromCount(count logging.ByteCount) float64 {
return float64(count)

View File

@ -1,6 +1,8 @@
package quic
import (
"reflect"
"strings"
"sync"
"github.com/prometheus/client_golang/prometheus"
@ -16,10 +18,10 @@ var (
clientMetrics = struct {
totalConnections prometheus.Counter
closedConnections prometheus.Counter
sentPackets *prometheus.CounterVec
sentFrames *prometheus.CounterVec
sentBytes *prometheus.CounterVec
receivePackets *prometheus.CounterVec
receiveBytes *prometheus.CounterVec
receivedFrames *prometheus.CounterVec
receivedBytes *prometheus.CounterVec
bufferedPackets *prometheus.CounterVec
droppedPackets *prometheus.CounterVec
lostPackets *prometheus.CounterVec
@ -28,43 +30,88 @@ var (
smoothedRTT *prometheus.GaugeVec
}{
totalConnections: prometheus.NewCounter(
totalConnectionsOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "total_connections",
Help: "Number of connections initiated",
},
),
closedConnections: prometheus.NewCounter(
closedConnectionsOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "closed_connections",
Help: "Number of connections that has been closed",
},
),
sentPackets: prometheus.NewCounterVec(
sentPacketsOpts(logging.PerspectiveClient),
clientConnLabels,
sentFrames: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "sent_frames",
Help: "Number of frames that have been sent through a connection",
},
append(clientConnLabels, "frame_type"),
),
sentBytes: prometheus.NewCounterVec(
sentBytesOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "sent_bytes",
Help: "Number of bytes that have been sent through a connection",
},
clientConnLabels,
),
receivePackets: prometheus.NewCounterVec(
receivePacketsOpts(logging.PerspectiveClient),
clientConnLabels,
receivedFrames: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "received_frames",
Help: "Number of frames that have been received through a connection",
},
append(clientConnLabels, "frame_type"),
),
receiveBytes: prometheus.NewCounterVec(
receiveBytesOpts(logging.PerspectiveClient),
receivedBytes: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "receive_bytes",
Help: "Number of bytes that have been received through a connection",
},
clientConnLabels,
),
bufferedPackets: prometheus.NewCounterVec(
bufferedPacketsOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "buffered_packets",
Help: "Number of bytes that have been buffered on a connection",
},
append(clientConnLabels, "packet_type"),
),
droppedPackets: prometheus.NewCounterVec(
droppedPacketsOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "dropped_packets",
Help: "Number of bytes that have been dropped on a connection",
},
append(clientConnLabels, "packet_type", "reason"),
),
lostPackets: prometheus.NewCounterVec(
lostPacketsOpts(logging.PerspectiveClient),
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "client",
Name: "lost_packets",
Help: "Number of packets that have been lost from a connection",
},
append(clientConnLabels, "reason"),
),
minRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Subsystem: "client",
Name: "min_rtt",
Help: "Lowest RTT measured on a connection in millisec",
},
@ -73,7 +120,7 @@ var (
latestRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Subsystem: "client",
Name: "latest_rtt",
Help: "Latest RTT measured on a connection",
},
@ -82,188 +129,37 @@ var (
smoothedRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Subsystem: "client",
Name: "smoothed_rtt",
Help: "Calculated smoothed RTT measured on a connection in millisec",
},
clientConnLabels,
),
}
// The server has many QUIC connections. Adding per connection label incurs high memory cost
serverMetrics = struct {
totalConnections prometheus.Counter
closedConnections prometheus.Counter
sentPackets prometheus.Counter
sentBytes prometheus.Counter
receivePackets prometheus.Counter
receiveBytes prometheus.Counter
bufferedPackets *prometheus.CounterVec
droppedPackets *prometheus.CounterVec
lostPackets *prometheus.CounterVec
rtt prometheus.Histogram
}{
totalConnections: prometheus.NewCounter(
totalConnectionsOpts(logging.PerspectiveServer),
),
closedConnections: prometheus.NewCounter(
closedConnectionsOpts(logging.PerspectiveServer),
),
sentPackets: prometheus.NewCounter(
sentPacketsOpts(logging.PerspectiveServer),
),
sentBytes: prometheus.NewCounter(
sentBytesOpts(logging.PerspectiveServer),
),
receivePackets: prometheus.NewCounter(
receivePacketsOpts(logging.PerspectiveServer),
),
receiveBytes: prometheus.NewCounter(
receiveBytesOpts(logging.PerspectiveServer),
),
bufferedPackets: prometheus.NewCounterVec(
bufferedPacketsOpts(logging.PerspectiveServer),
[]string{"packet_type"},
),
droppedPackets: prometheus.NewCounterVec(
droppedPacketsOpts(logging.PerspectiveServer),
[]string{"packet_type", "reason"},
),
lostPackets: prometheus.NewCounterVec(
lostPacketsOpts(logging.PerspectiveServer),
[]string{"reason"},
),
rtt: prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveServer),
Name: "rtt",
Buckets: []float64{5, 10, 20, 30, 40, 50, 75, 100},
},
),
}
registerClient = sync.Once{}
registerServer = sync.Once{}
packetTooBigDropped = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Subsystem: "client",
Name: "packet_too_big_dropped",
Help: "Count of packets received from origin that are too big to send to the edge and are dropped as a result",
})
)
// MetricsCollector abstracts the difference between client and server metrics from connTracer
type MetricsCollector interface {
startedConnection()
closedConnection(err error)
sentPackets(logging.ByteCount)
receivedPackets(logging.ByteCount)
bufferedPackets(logging.PacketType)
droppedPackets(logging.PacketType, logging.ByteCount, logging.PacketDropReason)
lostPackets(logging.PacketLossReason)
updatedRTT(*logging.RTTStats)
}
func totalConnectionsOpts(p logging.Perspective) prometheus.CounterOpts {
var help string
if p == logging.PerspectiveClient {
help = "Number of connections initiated. For all quic metrics, client means the side initiating the connection"
} else {
help = "Number of connections accepted. For all quic metrics, server means the side accepting connections"
}
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "total_connections",
Help: help,
}
}
func closedConnectionsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "closed_connections",
Help: "Number of connections that has been closed",
}
}
func sentPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "sent_packets",
Help: "Number of packets that have been sent through a connection",
}
}
func sentBytesOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "sent_bytes",
Help: "Number of bytes that have been sent through a connection",
}
}
func receivePacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "receive_packets",
Help: "Number of packets that have been received through a connection",
}
}
func receiveBytesOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "receive_bytes",
Help: "Number of bytes that have been received through a connection",
}
}
func bufferedPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "buffered_packets",
Help: "Number of bytes that have been buffered on a connection",
}
}
func droppedPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "dropped_packets",
Help: "Number of bytes that have been dropped on a connection",
}
}
func lostPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "lost_packets",
Help: "Number of packets that have been lost from a connection",
}
}
type clientCollector struct {
index string
}
func newClientCollector(index uint8) MetricsCollector {
func newClientCollector(index uint8) *clientCollector {
registerClient.Do(func() {
prometheus.MustRegister(
clientMetrics.totalConnections,
clientMetrics.closedConnections,
clientMetrics.sentPackets,
clientMetrics.sentFrames,
clientMetrics.sentBytes,
clientMetrics.receivePackets,
clientMetrics.receiveBytes,
clientMetrics.receivedFrames,
clientMetrics.receivedBytes,
clientMetrics.bufferedPackets,
clientMetrics.droppedPackets,
clientMetrics.lostPackets,
@ -286,14 +182,12 @@ func (cc *clientCollector) closedConnection(err error) {
clientMetrics.closedConnections.Inc()
}
func (cc *clientCollector) sentPackets(size logging.ByteCount) {
clientMetrics.sentPackets.WithLabelValues(cc.index).Inc()
clientMetrics.sentBytes.WithLabelValues(cc.index).Add(byteCountToPromCount(size))
func (cc *clientCollector) sentPackets(size logging.ByteCount, frames []logging.Frame) {
cc.collectPackets(size, frames, clientMetrics.sentFrames, clientMetrics.sentBytes)
}
func (cc *clientCollector) receivedPackets(size logging.ByteCount) {
clientMetrics.receivePackets.WithLabelValues(cc.index).Inc()
clientMetrics.receiveBytes.WithLabelValues(cc.index).Add(byteCountToPromCount(size))
func (cc *clientCollector) receivedPackets(size logging.ByteCount, frames []logging.Frame) {
cc.collectPackets(size, frames, clientMetrics.receivedFrames, clientMetrics.receivedBytes)
}
func (cc *clientCollector) bufferedPackets(packetType logging.PacketType) {
@ -318,63 +212,18 @@ func (cc *clientCollector) updatedRTT(rtt *logging.RTTStats) {
clientMetrics.smoothedRTT.WithLabelValues(cc.index).Set(durationToPromGauge(rtt.SmoothedRTT()))
}
type serverCollector struct{}
func newServiceCollector() MetricsCollector {
registerServer.Do(func() {
prometheus.MustRegister(
serverMetrics.totalConnections,
serverMetrics.closedConnections,
serverMetrics.sentPackets,
serverMetrics.sentBytes,
serverMetrics.receivePackets,
serverMetrics.receiveBytes,
serverMetrics.bufferedPackets,
serverMetrics.droppedPackets,
serverMetrics.lostPackets,
serverMetrics.rtt,
)
})
return &serverCollector{}
func (cc *clientCollector) collectPackets(size logging.ByteCount, frames []logging.Frame, counter, bandwidth *prometheus.CounterVec) {
for _, frame := range frames {
counter.WithLabelValues(cc.index, frameName(frame)).Inc()
}
bandwidth.WithLabelValues(cc.index).Add(byteCountToPromCount(size))
}
func (sc *serverCollector) startedConnection() {
serverMetrics.totalConnections.Inc()
}
func (sc *serverCollector) closedConnection(err error) {
serverMetrics.closedConnections.Inc()
}
func (sc *serverCollector) sentPackets(size logging.ByteCount) {
serverMetrics.sentPackets.Inc()
serverMetrics.sentBytes.Add(byteCountToPromCount(size))
}
func (sc *serverCollector) receivedPackets(size logging.ByteCount) {
serverMetrics.receivePackets.Inc()
serverMetrics.receiveBytes.Add(byteCountToPromCount(size))
}
func (sc *serverCollector) bufferedPackets(packetType logging.PacketType) {
serverMetrics.bufferedPackets.WithLabelValues(packetTypeString(packetType)).Inc()
}
func (sc *serverCollector) droppedPackets(packetType logging.PacketType, size logging.ByteCount, reason logging.PacketDropReason) {
serverMetrics.droppedPackets.WithLabelValues(
packetTypeString(packetType),
packetDropReasonString(reason),
).Add(byteCountToPromCount(size))
}
func (sc *serverCollector) lostPackets(reason logging.PacketLossReason) {
serverMetrics.lostPackets.WithLabelValues(packetLossReasonString(reason)).Inc()
}
func (sc *serverCollector) updatedRTT(rtt *logging.RTTStats) {
latestRTT := rtt.LatestRTT()
// May return 0 if no valid updates have occurred
if latestRTT > 0 {
serverMetrics.rtt.Observe(durationToPromGauge(latestRTT))
func frameName(frame logging.Frame) string {
if frame == nil {
return "nil"
} else {
name := reflect.TypeOf(frame).Elem().Name()
return strings.TrimSuffix(name, "Frame")
}
}

View File

@ -3,7 +3,6 @@ package quic
import (
"context"
"net"
"time"
"github.com/quic-go/quic-go/logging"
"github.com/rs/zerolog"
@ -16,8 +15,6 @@ type tracer struct {
}
type tracerConfig struct {
isClient bool
// Only client has an index
index uint8
}
@ -25,67 +22,36 @@ func NewClientTracer(logger *zerolog.Logger, index uint8) func(context.Context,
t := &tracer{
logger: logger,
config: &tracerConfig{
isClient: true,
index: index,
index: index,
},
}
return t.TracerForConnection
}
func NewServerTracer(logger *zerolog.Logger) *logging.Tracer {
return &logging.Tracer{
SentPacket: func(net.Addr, *logging.Header, logging.ByteCount, []logging.Frame) {},
SentVersionNegotiationPacket: func(_ net.Addr, dest, src logging.ArbitraryLenConnectionID, _ []logging.VersionNumber) {},
DroppedPacket: func(net.Addr, logging.PacketType, logging.ByteCount, logging.PacketDropReason) {},
}
}
func (t *tracer) TracerForConnection(_ctx context.Context, _p logging.Perspective, _odcid logging.ConnectionID) *logging.ConnectionTracer {
if t.config.isClient {
return newConnTracer(newClientCollector(t.config.index))
}
return newConnTracer(newServiceCollector())
return newConnTracer(newClientCollector(t.config.index))
}
// connTracer collects connection level metrics
type connTracer struct {
metricsCollector MetricsCollector
metricsCollector *clientCollector
}
func newConnTracer(metricsCollector MetricsCollector) *logging.ConnectionTracer {
func newConnTracer(metricsCollector *clientCollector) *logging.ConnectionTracer {
tracer := connTracer{
metricsCollector: metricsCollector,
}
return &logging.ConnectionTracer{
StartedConnection: tracer.StartedConnection,
NegotiatedVersion: tracer.NegotiatedVersion,
ClosedConnection: tracer.ClosedConnection,
SentTransportParameters: tracer.SentTransportParameters,
ReceivedTransportParameters: tracer.ReceivedTransportParameters,
RestoredTransportParameters: tracer.RestoredTransportParameters,
SentLongHeaderPacket: tracer.SentLongHeaderPacket,
SentShortHeaderPacket: tracer.SentShortHeaderPacket,
ReceivedVersionNegotiationPacket: tracer.ReceivedVersionNegotiationPacket,
ReceivedRetry: tracer.ReceivedRetry,
ReceivedLongHeaderPacket: tracer.ReceivedLongHeaderPacket,
ReceivedShortHeaderPacket: tracer.ReceivedShortHeaderPacket,
BufferedPacket: tracer.BufferedPacket,
DroppedPacket: tracer.DroppedPacket,
UpdatedMetrics: tracer.UpdatedMetrics,
AcknowledgedPacket: tracer.AcknowledgedPacket,
LostPacket: tracer.LostPacket,
UpdatedCongestionState: tracer.UpdatedCongestionState,
UpdatedPTOCount: tracer.UpdatedPTOCount,
UpdatedKeyFromTLS: tracer.UpdatedKeyFromTLS,
UpdatedKey: tracer.UpdatedKey,
DroppedEncryptionLevel: tracer.DroppedEncryptionLevel,
DroppedKey: tracer.DroppedKey,
SetLossTimer: tracer.SetLossTimer,
LossTimerExpired: tracer.LossTimerExpired,
LossTimerCanceled: tracer.LossTimerCanceled,
ECNStateUpdated: tracer.ECNStateUpdated,
Close: tracer.Close,
Debug: tracer.Debug,
StartedConnection: tracer.StartedConnection,
ClosedConnection: tracer.ClosedConnection,
SentLongHeaderPacket: tracer.SentLongHeaderPacket,
SentShortHeaderPacket: tracer.SentShortHeaderPacket,
ReceivedLongHeaderPacket: tracer.ReceivedLongHeaderPacket,
ReceivedShortHeaderPacket: tracer.ReceivedShortHeaderPacket,
BufferedPacket: tracer.BufferedPacket,
DroppedPacket: tracer.DroppedPacket,
UpdatedMetrics: tracer.UpdatedMetrics,
LostPacket: tracer.LostPacket,
}
}
@ -97,14 +63,6 @@ func (ct *connTracer) ClosedConnection(err error) {
ct.metricsCollector.closedConnection(err)
}
func (ct *connTracer) SentPacket(hdr *logging.ExtendedHeader, packetSize logging.ByteCount, ack *logging.AckFrame, frames []logging.Frame) {
ct.metricsCollector.sentPackets(packetSize)
}
func (ct *connTracer) ReceivedPacket(hdr *logging.ExtendedHeader, size logging.ByteCount, frames []logging.Frame) {
ct.metricsCollector.receivedPackets(size)
}
func (ct *connTracer) BufferedPacket(pt logging.PacketType, size logging.ByteCount) {
ct.metricsCollector.bufferedPackets(pt)
}
@ -121,74 +79,20 @@ func (ct *connTracer) UpdatedMetrics(rttStats *logging.RTTStats, cwnd, bytesInFl
ct.metricsCollector.updatedRTT(rttStats)
}
func (ct *connTracer) NegotiatedVersion(chosen logging.VersionNumber, clientVersions, serverVersions []logging.VersionNumber) {
}
func (ct *connTracer) SentTransportParameters(parameters *logging.TransportParameters) {
}
func (ct *connTracer) ReceivedTransportParameters(parameters *logging.TransportParameters) {
}
func (ct *connTracer) RestoredTransportParameters(parameters *logging.TransportParameters) {
}
func (ct *connTracer) SentLongHeaderPacket(hdr *logging.ExtendedHeader, size logging.ByteCount, ecn logging.ECN, ack *logging.AckFrame, frames []logging.Frame) {
ct.metricsCollector.sentPackets(size, frames)
}
func (ct *connTracer) SentShortHeaderPacket(hdr *logging.ShortHeader, size logging.ByteCount, ecn logging.ECN, ack *logging.AckFrame, frames []logging.Frame) {
}
func (ct *connTracer) ReceivedVersionNegotiationPacket(dest, src logging.ArbitraryLenConnectionID, _ []logging.VersionNumber) {
}
func (ct *connTracer) ReceivedRetry(header *logging.Header) {
ct.metricsCollector.sentPackets(size, frames)
}
func (ct *connTracer) ReceivedLongHeaderPacket(hdr *logging.ExtendedHeader, size logging.ByteCount, ecn logging.ECN, frames []logging.Frame) {
ct.metricsCollector.receivedPackets(size, frames)
}
func (ct *connTracer) ReceivedShortHeaderPacket(hdr *logging.ShortHeader, size logging.ByteCount, ecn logging.ECN, frames []logging.Frame) {
}
func (ct *connTracer) AcknowledgedPacket(level logging.EncryptionLevel, number logging.PacketNumber) {
}
func (ct *connTracer) UpdatedCongestionState(state logging.CongestionState) {
}
func (ct *connTracer) UpdatedPTOCount(value uint32) {
}
func (ct *connTracer) UpdatedKeyFromTLS(level logging.EncryptionLevel, perspective logging.Perspective) {
}
func (ct *connTracer) UpdatedKey(generation logging.KeyPhase, remote bool) {
}
func (ct *connTracer) DroppedEncryptionLevel(level logging.EncryptionLevel) {
}
func (ct *connTracer) DroppedKey(generation logging.KeyPhase) {
}
func (ct *connTracer) SetLossTimer(timerType logging.TimerType, level logging.EncryptionLevel, time time.Time) {
}
func (ct *connTracer) LossTimerExpired(timerType logging.TimerType, level logging.EncryptionLevel) {
}
func (ct *connTracer) LossTimerCanceled() {
}
func (ct *connTracer) ECNStateUpdated(state logging.ECNState, trigger logging.ECNStateTrigger) {
}
func (ct *connTracer) Close() {
}
func (ct *connTracer) Debug(name, msg string) {
ct.metricsCollector.receivedPackets(size, frames)
}
type quicLogger struct {