TUN-3208: Reduce copies and allocations on h2mux write path. Pre-allocate 16KB write buffer on the first write if possible. Use explicit byte array for chunks on write thread to avoid copying through intermediate buffer due to io.CopyN.
benchmark old ns/op new ns/op delta BenchmarkSingleStreamLargeResponseBody-8 17786594 12163494 -31.61% benchmark old allocs new allocs delta BenchmarkSingleStreamLargeResponseBody-8 17086 15869 -7.12% benchmark old bytes new bytes delta BenchmarkSingleStreamLargeResponseBody-8 58215169 21604391 -62.89%
This commit is contained in:
parent
42fe2e7266
commit
bd15c6b8c3
|
@ -7,11 +7,12 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cloudflare/cloudflared/logger"
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"golang.org/x/net/http2"
|
"golang.org/x/net/http2"
|
||||||
"golang.org/x/net/http2/hpack"
|
"golang.org/x/net/http2/hpack"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
|
"github.com/cloudflare/cloudflared/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -21,6 +22,7 @@ const (
|
||||||
defaultTimeout time.Duration = 5 * time.Second
|
defaultTimeout time.Duration = 5 * time.Second
|
||||||
defaultRetries uint64 = 5
|
defaultRetries uint64 = 5
|
||||||
defaultWriteBufferMaxLen int = 1024 * 1024 // 1mb
|
defaultWriteBufferMaxLen int = 1024 * 1024 // 1mb
|
||||||
|
writeBufferInitialSize int = 16 * 1024 // 16KB
|
||||||
|
|
||||||
SettingMuxerMagic http2.SettingID = 0x42db
|
SettingMuxerMagic http2.SettingID = 0x42db
|
||||||
MuxerMagicOrigin uint32 = 0xa2e43c8b
|
MuxerMagicOrigin uint32 = 0xa2e43c8b
|
||||||
|
@ -206,9 +208,9 @@ func Handshake(
|
||||||
initialStreamWindow: m.config.DefaultWindowSize,
|
initialStreamWindow: m.config.DefaultWindowSize,
|
||||||
streamWindowMax: m.config.MaxWindowSize,
|
streamWindowMax: m.config.MaxWindowSize,
|
||||||
streamWriteBufferMaxLen: m.config.StreamWriteBufferMaxLen,
|
streamWriteBufferMaxLen: m.config.StreamWriteBufferMaxLen,
|
||||||
r: m.r,
|
r: m.r,
|
||||||
metricsUpdater: m.muxMetricsUpdater,
|
metricsUpdater: m.muxMetricsUpdater,
|
||||||
bytesRead: inBoundCounter,
|
bytesRead: inBoundCounter,
|
||||||
}
|
}
|
||||||
m.muxWriter = &MuxWriter{
|
m.muxWriter = &MuxWriter{
|
||||||
f: m.f,
|
f: m.f,
|
||||||
|
|
|
@ -138,6 +138,14 @@ func (s *MuxedStream) Write(p []byte) (int, error) {
|
||||||
if s.writeEOF {
|
if s.writeEOF {
|
||||||
return 0, io.EOF
|
return 0, io.EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pre-allocate some space in the write buffer if possible
|
||||||
|
if buffer, ok := s.writeBuffer.(*bytes.Buffer); ok {
|
||||||
|
if buffer.Cap() == 0 {
|
||||||
|
buffer.Grow(writeBufferInitialSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
totalWritten := 0
|
totalWritten := 0
|
||||||
for totalWritten < len(p) {
|
for totalWritten < len(p) {
|
||||||
// If the buffer is full, block till there is more room.
|
// If the buffer is full, block till there is more room.
|
||||||
|
@ -367,7 +375,9 @@ type streamChunk struct {
|
||||||
// true if data frames should be sent
|
// true if data frames should be sent
|
||||||
sendData bool
|
sendData bool
|
||||||
eof bool
|
eof bool
|
||||||
buffer bytes.Buffer
|
|
||||||
|
buffer []byte
|
||||||
|
offset int
|
||||||
}
|
}
|
||||||
|
|
||||||
// getChunk atomically extracts a chunk of data to be written by MuxWriter.
|
// getChunk atomically extracts a chunk of data to be written by MuxWriter.
|
||||||
|
@ -385,8 +395,17 @@ func (s *MuxedStream) getChunk() *streamChunk {
|
||||||
eof: s.writeEOF && uint32(s.writeBuffer.Len()) <= s.sendWindow,
|
eof: s.writeEOF && uint32(s.writeBuffer.Len()) <= s.sendWindow,
|
||||||
}
|
}
|
||||||
// Copy at most s.sendWindow bytes, adjust the sendWindow accordingly
|
// Copy at most s.sendWindow bytes, adjust the sendWindow accordingly
|
||||||
writeLen, _ := io.CopyN(&chunk.buffer, s.writeBuffer, int64(s.sendWindow))
|
toCopy := int(s.sendWindow)
|
||||||
s.sendWindow -= uint32(writeLen)
|
if toCopy > s.writeBuffer.Len() {
|
||||||
|
toCopy = s.writeBuffer.Len()
|
||||||
|
}
|
||||||
|
|
||||||
|
if toCopy > 0 {
|
||||||
|
buf := make([]byte, toCopy)
|
||||||
|
writeLen, _ := s.writeBuffer.Read(buf)
|
||||||
|
chunk.buffer = buf[:writeLen]
|
||||||
|
s.sendWindow -= uint32(writeLen)
|
||||||
|
}
|
||||||
|
|
||||||
// Allow MuxedStream::Write() to continue, if needed
|
// Allow MuxedStream::Write() to continue, if needed
|
||||||
if s.writeBuffer.Len() < s.writeBufferMaxLen {
|
if s.writeBuffer.Len() < s.writeBufferMaxLen {
|
||||||
|
@ -421,8 +440,15 @@ func (c *streamChunk) sendDataFrame() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *streamChunk) nextDataFrame(frameSize int) (payload []byte, endStream bool) {
|
func (c *streamChunk) nextDataFrame(frameSize int) (payload []byte, endStream bool) {
|
||||||
payload = c.buffer.Next(frameSize)
|
bytesLeft := len(c.buffer) - c.offset
|
||||||
if c.buffer.Len() == 0 {
|
if frameSize > bytesLeft {
|
||||||
|
frameSize = bytesLeft
|
||||||
|
}
|
||||||
|
nextOffset := c.offset + frameSize
|
||||||
|
payload = c.buffer[c.offset:nextOffset]
|
||||||
|
c.offset = nextOffset
|
||||||
|
|
||||||
|
if c.offset == len(c.buffer) {
|
||||||
// this is the last data frame in this chunk
|
// this is the last data frame in this chunk
|
||||||
c.sendData = false
|
c.sendData = false
|
||||||
if c.eof {
|
if c.eof {
|
||||||
|
|
Loading…
Reference in New Issue